From 22ada802ede89829dd010a317d9b812b7df7111a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 21 Sep 2020 22:32:48 -0400 Subject: block: use lcm_not_zero() when stacking chunk_sectors Like 'io_opt', blk_stack_limits() should stack 'chunk_sectors' using lcm_not_zero() rather than min_not_zero() -- otherwise the final 'chunk_sectors' could result in sub-optimal alignment of IO to component devices in the IO stack. Also, if 'chunk_sectors' isn't a multiple of 'physical_block_size' then it is a bug in the driver and the device should be flagged as 'misaligned'. Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-settings.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index 76a7e03bcd6c..b2e1a929a6db 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -534,6 +534,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); + t->chunk_sectors = lcm_not_zero(t->chunk_sectors, b->chunk_sectors); /* Physical block size a multiple of the logical block size? */ if (t->physical_block_size & (t->logical_block_size - 1)) { @@ -556,6 +557,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ret = -1; } + /* chunk_sectors a multiple of the physical block size? */ + if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) { + t->chunk_sectors = 0; + t->misaligned = 1; + ret = -1; + } + t->raid_partial_stripes_expensive = max(t->raid_partial_stripes_expensive, b->raid_partial_stripes_expensive); @@ -594,10 +602,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->discard_granularity; } - if (b->chunk_sectors) - t->chunk_sectors = min_not_zero(t->chunk_sectors, - b->chunk_sectors); - t->zoned = max(t->zoned, b->zoned); return ret; } -- cgit From 07d098e6bbad04030dab5b3e64149601fcb063ce Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 21 Sep 2020 22:32:49 -0400 Subject: block: allow 'chunk_sectors' to be non-power-of-2 It is possible, albeit more unlikely, for a block device to have a non power-of-2 for chunk_sectors (e.g. 10+2 RAID6 with 128K chunk_sectors, which results in a full-stripe size of 1280K. This causes the RAID6's io_opt to be advertised as 1280K, and a stacked device _could_ then be made to use a blocksize, aka chunk_sectors, that matches non power-of-2 io_opt of underlying RAID6 -- resulting in stacked device's chunk_sectors being a non power-of-2). Update blk_queue_chunk_sectors() and blk_max_size_offset() to accommodate drivers that need a non power-of-2 chunk_sectors. Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-settings.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index b2e1a929a6db..5ea3de48afba 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -172,15 +172,13 @@ EXPORT_SYMBOL(blk_queue_max_hw_sectors); * * Description: * If a driver doesn't want IOs to cross a given chunk size, it can set - * this limit and prevent merging across chunks. Note that the chunk size - * must currently be a power-of-2 in sectors. Also note that the block - * layer must accept a page worth of data at any offset. So if the - * crossing of chunks is a hard limitation in the driver, it must still be - * prepared to split single page bios. + * this limit and prevent merging across chunks. Note that the block layer + * must accept a page worth of data at any offset. So if the crossing of + * chunks is a hard limitation in the driver, it must still be prepared + * to split single page bios. **/ void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors) { - BUG_ON(!is_power_of_2(chunk_sectors)); q->limits.chunk_sectors = chunk_sectors; } EXPORT_SYMBOL(blk_queue_chunk_sectors); -- cgit From c2e4cd57cfa1f627b786c764d185fff85fd12be9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:34 +0200 Subject: block: lift setting the readahead size into the block layer Drivers shouldn't really mess with the readahead size, as that is a VM concept. Instead set it based on the optimal I/O size by lifting the algorithm from the md driver when registering the disk. Also set bdi->io_pages there as well by applying the same scheme based on max_sectors. To ensure the limits work well for stacking drivers a new helper is added to update the readahead limits from the block limits, which is also called from disk_stack_limits. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Jan Kara Reviewed-by: Mike Snitzer Reviewed-by: Martin K. Petersen Acked-by: Coly Li Signed-off-by: Jens Axboe --- block/blk-settings.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index 5ea3de48afba..4f6eb4bb1723 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -372,6 +372,19 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) } EXPORT_SYMBOL(blk_queue_alignment_offset); +void blk_queue_update_readahead(struct request_queue *q) +{ + /* + * For read-ahead of large files to be effective, we need to read ahead + * at least twice the optimal I/O size. + */ + q->backing_dev_info->ra_pages = + max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + q->backing_dev_info->io_pages = + queue_max_sectors(q) >> (PAGE_SHIFT - 9); +} +EXPORT_SYMBOL_GPL(blk_queue_update_readahead); + /** * blk_limits_io_min - set minimum request size for a device * @limits: the queue limits @@ -450,6 +463,8 @@ EXPORT_SYMBOL(blk_limits_io_opt); void blk_queue_io_opt(struct request_queue *q, unsigned int opt) { blk_limits_io_opt(&q->limits, opt); + q->backing_dev_info->ra_pages = + max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); } EXPORT_SYMBOL(blk_queue_io_opt); @@ -631,8 +646,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, top, bottom); } - t->backing_dev_info->io_pages = - t->limits.max_sectors >> (PAGE_SHIFT - 9); + blk_queue_update_readahead(disk->queue); } EXPORT_SYMBOL(disk_stack_limits); -- cgit