From 5c4bd1f40c23d08ffbdccd68a5fd63751c794d89 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 3 Dec 2019 10:39:01 +0100 Subject: null_blk: fix zone size paramter check For zoned=1 mode, the zone size must be a power of 2. Check this not only when the zone size is specified during modprobe, but also when creating a zoned null_blk device using configfs. Signed-off-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 795fda576824..53ba9c7f2786 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1607,7 +1607,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) return blk_mq_alloc_tag_set(set); } -static void null_validate_conf(struct nullb_device *dev) +static int null_validate_conf(struct nullb_device *dev) { dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); @@ -1634,6 +1634,14 @@ static void null_validate_conf(struct nullb_device *dev) /* can not stop a queue */ if (dev->queue_mode == NULL_Q_BIO) dev->mbps = 0; + + if (dev->zoned && + (!dev->zone_size || !is_power_of_2(dev->zone_size))) { + pr_err("zone_size must be power-of-two\n"); + return -EINVAL; + } + + return 0; } #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION @@ -1666,7 +1674,9 @@ static int null_add_dev(struct nullb_device *dev) struct nullb *nullb; int rv; - null_validate_conf(dev); + rv = null_validate_conf(dev); + if (rv) + return rv; nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node); if (!nullb) { @@ -1792,11 +1802,6 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } - if (!is_power_of_2(g_zone_size)) { - pr_err("zone_size must be power-of-two\n"); - return -EINVAL; - } - if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { pr_err("invalid home_node value\n"); g_home_node = NUMA_NO_NODE; -- cgit From 979d54475e0b75a28e55528617fecf83b4a221da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:02 +0100 Subject: null_blk: cleanup null_gendisk_register Use a saner size calculation, and do a trivial cleanup on the zone revalidation to prepare to future changes. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 53ba9c7f2786..dd6026289fbf 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1559,14 +1559,14 @@ static int init_driver_queues(struct nullb *nullb) static int null_gendisk_register(struct nullb *nullb) { + sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; struct gendisk *disk; - sector_t size; + int ret; disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); if (!disk) return -ENOMEM; - size = (sector_t)nullb->dev->size * 1024 * 1024ULL; - set_capacity(disk, size >> 9); + set_capacity(disk, size); disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; disk->major = null_major; @@ -1577,9 +1577,8 @@ static int null_gendisk_register(struct nullb *nullb) strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); if (nullb->dev->zoned) { - int ret = blk_revalidate_disk_zones(disk); - - if (ret != 0) + ret = blk_revalidate_disk_zones(disk); + if (ret) return ret; } -- cgit From ae58954d8734c44298f55ed71e683ea944994fab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:07 +0100 Subject: block: don't handle bio based drivers in blk_revalidate_disk_zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bio based drivers only need to update q->nr_zones. Do that manually instead of overloading blk_revalidate_disk_zones to keep that function simpler for the next round of changes that will rely even more on the request based functionality. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index dd6026289fbf..068cd0ae6e2c 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1576,11 +1576,17 @@ static int null_gendisk_register(struct nullb *nullb) disk->queue = nullb->q; strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); +#ifdef CONFIG_BLK_DEV_ZONED if (nullb->dev->zoned) { - ret = blk_revalidate_disk_zones(disk); - if (ret) - return ret; + if (queue_is_mq(nullb->q)) { + ret = blk_revalidate_disk_zones(disk); + if (ret) + return ret; + } else { + nullb->q->nr_zones = blkdev_nr_zones(disk); + } } +#endif add_disk(disk); return 0; -- cgit From 6c6b3549142255c3fe4bab5560efdf8391c8d858 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:08 +0100 Subject: block: set the zone size in blk_revalidate_disk_zones atomically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current zone revalidation code has a major problem in that it doesn't update the zone size and q->nr_zones atomically, leading to a short window where an out of bounds access to the zone arrays is possible. To fix this move the setting of the zone size into the crticial sections blk_revalidate_disk_zones so that it gets updated together with the zone bitmaps and q->nr_zones. This also slightly simplifies the caller as it deducts the zone size from the report_zones. This change also allows to check for a power of two zone size in generic code. Reported-by: Hans Holmberg Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 068cd0ae6e2c..997b7dc095b9 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1583,6 +1583,8 @@ static int null_gendisk_register(struct nullb *nullb) if (ret) return ret; } else { + blk_queue_chunk_sectors(nullb->q, + nullb->dev->zone_size_sects); nullb->q->nr_zones = blkdev_nr_zones(disk); } } @@ -1746,7 +1748,6 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_blk_queue; - blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects); nullb->q->limits.zoned = BLK_ZONED_HM; blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q); blk_queue_required_elevator_features(nullb->q, -- cgit From f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 26 Nov 2019 16:36:05 +0100 Subject: xen/blkback: Avoid unmapping unmapped grant pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For each I/O request, blkback first maps the foreign pages for the request to its local pages. If an allocation of a local page for the mapping fails, it should unmap every mapping already made for the request. However, blkback's handling mechanism for the allocation failure does not mark the remaining foreign pages as unmapped. Therefore, the unmap function merely tries to unmap every valid grant page for the request, including the pages not mapped due to the allocation failure. On a system that fails the allocation frequently, this problem leads to following kernel crash. [ 372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 [ 372.012546] IP: [] gnttab_unmap_refs.part.7+0x1c/0x40 [ 372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0 [ 372.012562] Oops: 0002 [#1] SMP [ 372.012566] Modules linked in: act_police sch_ingress cls_u32 ... [ 372.012746] Call Trace: [ 372.012752] [] gnttab_unmap_refs+0x34/0x40 [ 372.012759] [] xen_blkbk_unmap+0x83/0x150 [xen_blkback] ... [ 372.012802] [] dispatch_rw_block_io+0x970/0x980 [xen_blkback] ... Decompressing Linux... Parsing ELF... done. Booting the kernel. [ 0.000000] Initializing cgroup subsys cpuset This commit fixes this problem by marking the grant pages of the given request that didn't mapped due to the allocation failure as invalid. Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings") Reviewed-by: David Woodhouse Reviewed-by: Maximilian Heyne Reviewed-by: Paul Durrant Reviewed-by: Roger Pau Monné Signed-off-by: SeongJae Park Signed-off-by: Jens Axboe --- drivers/block/xen-blkback/blkback.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index fd1e19f1a49f..3666afa639d1 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -936,6 +936,8 @@ next: out_of_memory: pr_alert("%s: out of memory\n", __func__); put_free_pages(ring, pages_to_gnt, segs_to_map); + for (i = last_map; i < num; i++) + pages[i]->handle = BLKBACK_INVALID_HANDLE; return -ENOMEM; } -- cgit From 36582a5a456100ebe4983e3d63b8cbc7e62a0ddc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 4 Dec 2019 19:31:14 +0800 Subject: brd: remove max_hw_sectors queue limit Now we depend on blk_queue_split() to respect most of queue limit (the only one exception could be dma alignment), however blk_queue_split() isn't used for brd, so this limit isn't respected since v4.3. Also max_hw_sectors limit doesn't play a big role for brd, which is added since brd is added to tree for unknown reason. So remove it. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/brd.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c548a5a6c1a0..c2e5b2ad88bc 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -382,7 +382,6 @@ static struct brd_device *brd_alloc(int i) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_max_hw_sectors(brd->brd_queue, 1024); /* This is so fdisk will align partitions on 4k, because of * direct_access API needing 4k alignment, returning a PFN -- cgit From f1acbf2186dfe761a05ce35c0f36246caed44403 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 4 Dec 2019 19:31:15 +0800 Subject: brd: warn on un-aligned buffer Queue dma alignment limit requires users(fs, target, ...) of block layer to pass aligned buffer. So far brd doesn't support un-aligned buffer, even though it is easy to support it. However, given brd is often used for debug purpose, and there are other drivers which can't support un-aligned buffer too. So add warning so that brd users know what to fix. Reported-by: Stephen Rust Cc: Stephen Rust Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/brd.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c2e5b2ad88bc..a8730cc4db10 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -297,6 +297,10 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) unsigned int len = bvec.bv_len; int err; + /* Don't support un-aligned buffer */ + WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) || + (len & (SECTOR_SIZE - 1))); + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, bio_op(bio), sector); if (err) -- cgit From bca1c43cb2dbe4212aea0793bfd91aeb4c2d184d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Dec 2019 09:17:41 -0700 Subject: null_blk: remove unused variable warning on !CONFIG_BLK_DEV_ZONED If BLK_DEV_ZONED isn't set, 'ret' isn't used. This makes gcc complain, rightfully. Move ret where it is used. Fixes: 979d54475e0b ("null_blk: cleanup null_gendisk_register") Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 997b7dc095b9..ae8d4bc532b0 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1561,7 +1561,6 @@ static int null_gendisk_register(struct nullb *nullb) { sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; struct gendisk *disk; - int ret; disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); if (!disk) @@ -1579,7 +1578,7 @@ static int null_gendisk_register(struct nullb *nullb) #ifdef CONFIG_BLK_DEV_ZONED if (nullb->dev->zoned) { if (queue_is_mq(nullb->q)) { - ret = blk_revalidate_disk_zones(disk); + int ret = blk_revalidate_disk_zones(disk); if (ret) return ret; } else { -- cgit