// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2018 Red Hat, Inc.
 *
 * This is a test "dust" device, which fails reads on specified
 * sectors, emulating the behavior of a hard disk drive sending
 * a "Read Medium Error" sense.
 *
 */

#include <linux/device-mapper.h>
#include <linux/module.h>
#include <linux/rbtree.h>

#define DM_MSG_PREFIX "dust"

struct badblock {
	struct rb_node node;
	sector_t bb;
	unsigned char wr_fail_cnt;
};

struct dust_device {
	struct dm_dev *dev;
	struct rb_root badblocklist;
	unsigned long long badblock_count;
	spinlock_t dust_lock;
	unsigned int blksz;
	int sect_per_block_shift;
	unsigned int sect_per_block;
	sector_t start;
	bool fail_read_on_bb:1;
	bool quiet_mode:1;
};

static struct badblock *dust_rb_search(struct rb_root *root, sector_t blk)
{
	struct rb_node *node = root->rb_node;

	while (node) {
		struct badblock *bblk = rb_entry(node, struct badblock, node);

		if (bblk->bb > blk)
			node = node->rb_left;
		else if (bblk->bb < blk)
			node = node->rb_right;
		else
			return bblk;
	}

	return NULL;
}

static bool dust_rb_insert(struct rb_root *root, struct badblock *new)
{
	struct badblock *bblk;
	struct rb_node **link = &root->rb_node, *parent = NULL;
	sector_t value = new->bb;

	while (*link) {
		parent = *link;
		bblk = rb_entry(parent, struct badblock, node);

		if (bblk->bb > value)
			link = &(*link)->rb_left;
		else if (bblk->bb < value)
			link = &(*link)->rb_right;
		else
			return false;
	}

	rb_link_node(&new->node, parent, link);
	rb_insert_color(&new->node, root);

	return true;
}

static int dust_remove_block(struct dust_device *dd, unsigned long long block)
{
	struct badblock *bblock;
	unsigned long flags;

	spin_lock_irqsave(&dd->dust_lock, flags);
	bblock = dust_rb_search(&dd->badblocklist, block);

	if (bblock == NULL) {
		if (!dd->quiet_mode) {
			DMERR("%s: block %llu not found in badblocklist",
			      __func__, block);
		}
		spin_unlock_irqrestore(&dd->dust_lock, flags);
		return -EINVAL;
	}

	rb_erase(&bblock->node, &dd->badblocklist);
	dd->badblock_count--;
	if (!dd->quiet_mode)
		DMINFO("%s: badblock removed at block %llu", __func__, block);
	kfree(bblock);
	spin_unlock_irqrestore(&dd->dust_lock, flags);

	return 0;
}

static int dust_add_block(struct dust_device *dd, unsigned long long block,
			  unsigned char wr_fail_cnt)
{
	struct badblock *bblock;
	unsigned long flags;

	bblock = kmalloc(sizeof(*bblock), GFP_KERNEL);
	if (bblock == NULL) {
		if (!dd->quiet_mode)
			DMERR("%s: badblock allocation failed", __func__);
		return -ENOMEM;
	}

	spin_lock_irqsave(&dd->dust_lock, flags);
	bblock->bb = block;
	bblock->wr_fail_cnt = wr_fail_cnt;
	if (!dust_rb_insert(&dd->badblocklist, bblock)) {
		if (!dd->quiet_mode) {
			DMERR("%s: block %llu already in badblocklist",
			      __func__, block);
		}
		spin_unlock_irqrestore(&dd->dust_lock, flags);
		kfree(bblock);
		return -EINVAL;
	}

	dd->badblock_count++;
	if (!dd->quiet_mode) {
		DMINFO("%s: badblock added at block %llu with write fail count %u",
		       __func__, block, wr_fail_cnt);
	}
	spin_unlock_irqrestore(&dd->dust_lock, flags);

	return 0;
}

static int dust_query_block(struct dust_device *dd, unsigned long long block, char *result,
			    unsigned int maxlen, unsigned int *sz_ptr)
{
	struct badblock *bblock;
	unsigned long flags;
	unsigned int sz = *sz_ptr;

	spin_lock_irqsave(&dd->dust_lock, flags);
	bblock = dust_rb_search(&dd->badblocklist, block);
	if (bblock != NULL)
		DMEMIT("%s: block %llu found in badblocklist", __func__, block);
	else
		DMEMIT("%s: block %llu not found in badblocklist", __func__, block);
	spin_unlock_irqrestore(&dd->dust_lock, flags);

	return 1;
}

static int __dust_map_read(struct dust_device *dd, sector_t thisblock)
{
	struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);

	if (bblk)
		return DM_MAPIO_KILL;

	return DM_MAPIO_REMAPPED;
}

static int dust_map_read(struct dust_device *dd, sector_t thisblock,
			 bool fail_read_on_bb)
{
	unsigned long flags;
	int r = DM_MAPIO_REMAPPED;

	if (fail_read_on_bb) {
		thisblock >>= dd->sect_per_block_shift;
		spin_lock_irqsave(&dd->dust_lock, flags);
		r = __dust_map_read(dd, thisblock);
		spin_unlock_irqrestore(&dd->dust_lock, flags);
	}

	return r;
}

static int __dust_map_write(struct dust_device *dd, sector_t thisblock)
{
	struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);

	if (bblk && bblk->wr_fail_cnt > 0) {
		bblk->wr_fail_cnt--;
		return DM_MAPIO_KILL;
	}

	if (bblk) {
		rb_erase(&bblk->node, &dd->badblocklist);
		dd->badblock_count--;
		kfree(bblk);
		if (!dd->quiet_mode) {
			sector_div(thisblock, dd->sect_per_block);
			DMINFO("block %llu removed from badblocklist by write",
			       (unsigned long long)thisblock);
		}
	}

	return DM_MAPIO_REMAPPED;
}

static int dust_map_write(struct dust_device *dd, sector_t thisblock,
			  bool fail_read_on_bb)
{
	unsigned long flags;
	int r = DM_MAPIO_REMAPPED;

	if (fail_read_on_bb) {
		thisblock >>= dd->sect_per_block_shift;
		spin_lock_irqsave(&dd->dust_lock, flags);
		r = __dust_map_write(dd, thisblock);
		spin_unlock_irqrestore(&dd->dust_lock, flags);
	}

	return r;
}

static int dust_map(struct dm_target *ti, struct bio *bio)
{
	struct dust_device *dd = ti->private;
	int r;

	bio_set_dev(bio, dd->dev->bdev);
	bio->bi_iter.bi_sector = dd->start + dm_target_offset(ti, bio->bi_iter.bi_sector);

	if (bio_data_dir(bio) == READ)
		r = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
	else
		r = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);

	return r;
}

static bool __dust_clear_badblocks(struct rb_root *tree,
				   unsigned long long count)
{
	struct rb_node *node = NULL, *nnode = NULL;

	nnode = rb_first(tree);
	if (nnode == NULL) {
		BUG_ON(count != 0);
		return false;
	}

	while (nnode) {
		node = nnode;
		nnode = rb_next(node);
		rb_erase(node, tree);
		count--;
		kfree(node);
	}
	BUG_ON(count != 0);
	BUG_ON(tree->rb_node != NULL);

	return true;
}

static int dust_clear_badblocks(struct dust_device *dd, char *result, unsigned int maxlen,
				unsigned int *sz_ptr)
{
	unsigned long flags;
	struct rb_root badblocklist;
	unsigned long long badblock_count;
	unsigned int sz = *sz_ptr;

	spin_lock_irqsave(&dd->dust_lock, flags);
	badblocklist = dd->badblocklist;
	badblock_count = dd->badblock_count;
	dd->badblocklist = RB_ROOT;
	dd->badblock_count = 0;
	spin_unlock_irqrestore(&dd->dust_lock, flags);

	if (!__dust_clear_badblocks(&badblocklist, badblock_count))
		DMEMIT("%s: no badblocks found", __func__);
	else
		DMEMIT("%s: badblocks cleared", __func__);

	return 1;
}

static int dust_list_badblocks(struct dust_device *dd, char *result, unsigned int maxlen,
				unsigned int *sz_ptr)
{
	unsigned long flags;
	struct rb_root badblocklist;
	struct rb_node *node;
	struct badblock *bblk;
	unsigned int sz = *sz_ptr;
	unsigned long long num = 0;

	spin_lock_irqsave(&dd->dust_lock, flags);
	badblocklist = dd->badblocklist;
	for (node = rb_first(&badblocklist); node; node = rb_next(node)) {
		bblk = rb_entry(node, struct badblock, node);
		DMEMIT("%llu\n", bblk->bb);
		num++;
	}

	spin_unlock_irqrestore(&dd->dust_lock, flags);
	if (!num)
		DMEMIT("No blocks in badblocklist");

	return 1;
}

/*
 * Target parameters:
 *
 * <device_path> <offset> <blksz>
 *
 * device_path: path to the block device
 * offset: offset to data area from start of device_path
 * blksz: block size (minimum 512, maximum 1073741824, must be a power of 2)
 */
static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
	struct dust_device *dd;
	unsigned long long tmp;
	char dummy;
	unsigned int blksz;
	unsigned int sect_per_block;
	sector_t DUST_MAX_BLKSZ_SECTORS = 2097152;
	sector_t max_block_sectors = min(ti->len, DUST_MAX_BLKSZ_SECTORS);

	if (argc != 3) {
		ti->error = "Invalid argument count";
		return -EINVAL;
	}

	if (kstrtouint(argv[2], 10, &blksz) || !blksz) {
		ti->error = "Invalid block size parameter";
		return -EINVAL;
	}

	if (blksz < 512) {
		ti->error = "Block size must be at least 512";
		return -EINVAL;
	}

	if (!is_power_of_2(blksz)) {
		ti->error = "Block size must be a power of 2";
		return -EINVAL;
	}

	if (to_sector(blksz) > max_block_sectors) {
		ti->error = "Block size is too large";
		return -EINVAL;
	}

	sect_per_block = (blksz >> SECTOR_SHIFT);

	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
		ti->error = "Invalid device offset sector";
		return -EINVAL;
	}

	dd = kzalloc(sizeof(struct dust_device), GFP_KERNEL);
	if (dd == NULL) {
		ti->error = "Cannot allocate context";
		return -ENOMEM;
	}

	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dd->dev)) {
		ti->error = "Device lookup failed";
		kfree(dd);
		return -EINVAL;
	}

	dd->sect_per_block = sect_per_block;
	dd->blksz = blksz;
	dd->start = tmp;

	dd->sect_per_block_shift = __ffs(sect_per_block);

	/*
	 * Whether to fail a read on a "bad" block.
	 * Defaults to false; enabled later by message.
	 */
	dd->fail_read_on_bb = false;

	/*
	 * Initialize bad block list rbtree.
	 */
	dd->badblocklist = RB_ROOT;
	dd->badblock_count = 0;
	spin_lock_init(&dd->dust_lock);

	dd->quiet_mode = false;

	BUG_ON(dm_set_target_max_io_len(ti, dd->sect_per_block) != 0);

	ti->num_discard_bios = 1;
	ti->num_flush_bios = 1;
	ti->private = dd;

	return 0;
}

static void dust_dtr(struct dm_target *ti)
{
	struct dust_device *dd = ti->private;

	__dust_clear_badblocks(&dd->badblocklist, dd->badblock_count);
	dm_put_device(ti, dd->dev);
	kfree(dd);
}

static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
			char *result, unsigned int maxlen)
{
	struct dust_device *dd = ti->private;
	sector_t size = bdev_nr_sectors(dd->dev->bdev);
	bool invalid_msg = false;
	int r = -EINVAL;
	unsigned long long tmp, block;
	unsigned char wr_fail_cnt;
	unsigned int tmp_ui;
	unsigned long flags;
	unsigned int sz = 0;
	char dummy;

	if (argc == 1) {
		if (!strcasecmp(argv[0], "addbadblock") ||
		    !strcasecmp(argv[0], "removebadblock") ||
		    !strcasecmp(argv[0], "queryblock")) {
			DMERR("%s requires an additional argument", argv[0]);
		} else if (!strcasecmp(argv[0], "disable")) {
			DMINFO("disabling read failures on bad sectors");
			dd->fail_read_on_bb = false;
			r = 0;
		} else if (!strcasecmp(argv[0], "enable")) {
			DMINFO("enabling read failures on bad sectors");
			dd->fail_read_on_bb = true;
			r = 0;
		} else if (!strcasecmp(argv[0], "countbadblocks")) {
			spin_lock_irqsave(&dd->dust_lock, flags);
			DMEMIT("countbadblocks: %llu badblock(s) found",
			       dd->badblock_count);
			spin_unlock_irqrestore(&dd->dust_lock, flags);
			r = 1;
		} else if (!strcasecmp(argv[0], "clearbadblocks")) {
			r = dust_clear_badblocks(dd, result, maxlen, &sz);
		} else if (!strcasecmp(argv[0], "quiet")) {
			if (!dd->quiet_mode)
				dd->quiet_mode = true;
			else
				dd->quiet_mode = false;
			r = 0;
		} else if (!strcasecmp(argv[0], "listbadblocks")) {
			r = dust_list_badblocks(dd, result, maxlen, &sz);
		} else {
			invalid_msg = true;
		}
	} else if (argc == 2) {
		if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
			return r;

		block = tmp;
		sector_div(size, dd->sect_per_block);
		if (block > size) {
			DMERR("selected block value out of range");
			return r;
		}

		if (!strcasecmp(argv[0], "addbadblock"))
			r = dust_add_block(dd, block, 0);
		else if (!strcasecmp(argv[0], "removebadblock"))
			r = dust_remove_block(dd, block);
		else if (!strcasecmp(argv[0], "queryblock"))
			r = dust_query_block(dd, block, result, maxlen, &sz);
		else
			invalid_msg = true;

	} else if (argc == 3) {
		if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
			return r;

		if (sscanf(argv[2], "%u%c", &tmp_ui, &dummy) != 1)
			return r;

		block = tmp;
		if (tmp_ui > 255) {
			DMERR("selected write fail count out of range");
			return r;
		}
		wr_fail_cnt = tmp_ui;
		sector_div(size, dd->sect_per_block);
		if (block > size) {
			DMERR("selected block value out of range");
			return r;
		}

		if (!strcasecmp(argv[0], "addbadblock"))
			r = dust_add_block(dd, block, wr_fail_cnt);
		else
			invalid_msg = true;

	} else
		DMERR("invalid number of arguments '%d'", argc);

	if (invalid_msg)
		DMERR("unrecognized message '%s' received", argv[0]);

	return r;
}

static void dust_status(struct dm_target *ti, status_type_t type,
			unsigned int status_flags, char *result, unsigned int maxlen)
{
	struct dust_device *dd = ti->private;
	unsigned int sz = 0;

	switch (type) {
	case STATUSTYPE_INFO:
		DMEMIT("%s %s %s", dd->dev->name,
		       dd->fail_read_on_bb ? "fail_read_on_bad_block" : "bypass",
		       dd->quiet_mode ? "quiet" : "verbose");
		break;

	case STATUSTYPE_TABLE:
		DMEMIT("%s %llu %u", dd->dev->name,
		       (unsigned long long)dd->start, dd->blksz);
		break;

	case STATUSTYPE_IMA:
		*result = '\0';
		break;
	}
}

static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
	struct dust_device *dd = ti->private;
	struct dm_dev *dev = dd->dev;

	*bdev = dev->bdev;

	/*
	 * Only pass ioctls through if the device sizes match exactly.
	 */
	if (dd->start || ti->len != bdev_nr_sectors(dev->bdev))
		return 1;

	return 0;
}

static int dust_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn,
				void *data)
{
	struct dust_device *dd = ti->private;

	return fn(ti, dd->dev, dd->start, ti->len, data);
}

static struct target_type dust_target = {
	.name = "dust",
	.version = {1, 0, 0},
	.module = THIS_MODULE,
	.ctr = dust_ctr,
	.dtr = dust_dtr,
	.iterate_devices = dust_iterate_devices,
	.map = dust_map,
	.message = dust_message,
	.status = dust_status,
	.prepare_ioctl = dust_prepare_ioctl,
};
module_dm(dust);

MODULE_DESCRIPTION(DM_NAME " dust test target");
MODULE_AUTHOR("Bryan Gurney <dm-devel@redhat.com>");
MODULE_LICENSE("GPL"