diff options
Diffstat (limited to 'block/blk-settings.c')
-rw-r--r-- | block/blk-settings.c | 1064 |
1 files changed, 442 insertions, 622 deletions
diff --git a/block/blk-settings.c b/block/blk-settings.c index d2731843f2fc..a000daafbfb4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -6,7 +6,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/bio.h> -#include <linux/blkdev.h> +#include <linux/blk-integrity.h> #include <linux/pagemap.h> #include <linux/backing-dev-defs.h> #include <linux/gcd.h> @@ -21,7 +21,7 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) { - q->rq_timeout = timeout; + WRITE_ONCE(q->rq_timeout, timeout); } EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); @@ -50,25 +50,31 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_sectors = UINT_MAX; lim->max_dev_sectors = UINT_MAX; lim->max_write_zeroes_sectors = UINT_MAX; - lim->max_zone_append_sectors = UINT_MAX; + lim->max_hw_zone_append_sectors = UINT_MAX; lim->max_user_discard_sectors = UINT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); -static void blk_apply_bdi_limits(struct backing_dev_info *bdi, +void blk_apply_bdi_limits(struct backing_dev_info *bdi, struct queue_limits *lim) { /* * For read-ahead of large files to be effective, we need to read ahead * at least twice the optimal I/O size. + * + * There is no hardware limitation for the read-ahead size and the user + * might have increased the read-ahead size through sysfs, so don't ever + * decrease it. */ - bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + bdi->ra_pages = max3(bdi->ra_pages, + lim->io_opt * 2 / PAGE_SIZE, + VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } static int blk_validate_zoned_limits(struct queue_limits *lim) { - if (!lim->zoned) { + if (!(lim->features & BLK_FEAT_ZONED)) { if (WARN_ON_ONCE(lim->max_open_zones) || WARN_ON_ONCE(lim->max_active_zones) || WARN_ON_ONCE(lim->zone_write_granularity) || @@ -80,30 +86,175 @@ static int blk_validate_zoned_limits(struct queue_limits *lim) if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED))) return -EINVAL; + /* + * Given that active zones include open zones, the maximum number of + * open zones cannot be larger than the maximum number of active zones. + */ + if (lim->max_active_zones && + lim->max_open_zones > lim->max_active_zones) + return -EINVAL; + if (lim->zone_write_granularity < lim->logical_block_size) lim->zone_write_granularity = lim->logical_block_size; - if (lim->max_zone_append_sectors) { + /* + * The Zone Append size is limited by the maximum I/O size and the zone + * size given that it can't span zones. + * + * If no max_hw_zone_append_sectors limit is provided, the block layer + * will emulated it, else we're also bound by the hardware limit. + */ + lim->max_zone_append_sectors = + min_not_zero(lim->max_hw_zone_append_sectors, + min(lim->chunk_sectors, lim->max_hw_sectors)); + return 0; +} + +static int blk_validate_integrity_limits(struct queue_limits *lim) +{ + struct blk_integrity *bi = &lim->integrity; + + if (!bi->tuple_size) { + if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE || + bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) { + pr_warn("invalid PI settings.\n"); + return -EINVAL; + } + bi->flags |= BLK_INTEGRITY_NOGENERATE | BLK_INTEGRITY_NOVERIFY; + return 0; + } + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { + pr_warn("integrity support disabled.\n"); + return -EINVAL; + } + + if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE && + (bi->flags & BLK_INTEGRITY_REF_TAG)) { + pr_warn("ref tag not support without checksum.\n"); + return -EINVAL; + } + + if (!bi->interval_exp) + bi->interval_exp = ilog2(lim->logical_block_size); + + return 0; +} + +/* + * Returns max guaranteed bytes which we can fit in a bio. + * + * We request that an atomic_write is ITER_UBUF iov_iter (so a single vector), + * so we assume that we can fit in at least PAGE_SIZE in a segment, apart from + * the first and last segments. + */ +static unsigned int blk_queue_max_guaranteed_bio(struct queue_limits *lim) +{ + unsigned int max_segments = min(BIO_MAX_VECS, lim->max_segments); + unsigned int length; + + length = min(max_segments, 2) * lim->logical_block_size; + if (max_segments > 2) + length += (max_segments - 2) * PAGE_SIZE; + + return length; +} + +static void blk_atomic_writes_update_limits(struct queue_limits *lim) +{ + unsigned int unit_limit = min(lim->max_hw_sectors << SECTOR_SHIFT, + blk_queue_max_guaranteed_bio(lim)); + + unit_limit = rounddown_pow_of_two(unit_limit); + + lim->atomic_write_max_sectors = + min(lim->atomic_write_hw_max >> SECTOR_SHIFT, + lim->max_hw_sectors); + lim->atomic_write_unit_min = + min(lim->atomic_write_hw_unit_min, unit_limit); + lim->atomic_write_unit_max = + min(lim->atomic_write_hw_unit_max, unit_limit); + lim->atomic_write_boundary_sectors = + lim->atomic_write_hw_boundary >> SECTOR_SHIFT; +} + +static void blk_validate_atomic_write_limits(struct queue_limits *lim) +{ + unsigned int boundary_sectors; + + if (!(lim->features & BLK_FEAT_ATOMIC_WRITES)) + goto unsupported; + + if (!lim->atomic_write_hw_max) + goto unsupported; + + if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_min))) + goto unsupported; + + if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_max))) + goto unsupported; + + if (WARN_ON_ONCE(lim->atomic_write_hw_unit_min > + lim->atomic_write_hw_unit_max)) + goto unsupported; + + if (WARN_ON_ONCE(lim->atomic_write_hw_unit_max > + lim->atomic_write_hw_max)) + goto unsupported; + + boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT; + + if (boundary_sectors) { + if (WARN_ON_ONCE(lim->atomic_write_hw_max > + lim->atomic_write_hw_boundary)) + goto unsupported; /* - * The Zone Append size is limited by the maximum I/O size - * and the zone size given that it can't span zones. + * A feature of boundary support is that it disallows bios to + * be merged which would result in a merged request which + * crosses either a chunk sector or atomic write HW boundary, + * even though chunk sectors may be just set for performance. + * For simplicity, disallow atomic writes for a chunk sector + * which is non-zero and smaller than atomic write HW boundary. + * Furthermore, chunk sectors must be a multiple of atomic + * write HW boundary. Otherwise boundary support becomes + * complicated. + * Devices which do not conform to these rules can be dealt + * with if and when they show up. */ - lim->max_zone_append_sectors = - min3(lim->max_hw_sectors, - lim->max_zone_append_sectors, - lim->chunk_sectors); + if (WARN_ON_ONCE(lim->chunk_sectors % boundary_sectors)) + goto unsupported; + + /* + * The boundary size just needs to be a multiple of unit_max + * (and not necessarily a power-of-2), so this following check + * could be relaxed in future. + * Furthermore, if needed, unit_max could even be reduced so + * that it is compliant with a !power-of-2 boundary. + */ + if (!is_power_of_2(boundary_sectors)) + goto unsupported; } - return 0; + blk_atomic_writes_update_limits(lim); + return; + +unsupported: + lim->atomic_write_max_sectors = 0; + lim->atomic_write_boundary_sectors = 0; + lim->atomic_write_unit_min = 0; + lim->atomic_write_unit_max = 0; } /* * Check that the limits in lim are valid, initialize defaults for unset * values, and cap values based on others where needed. */ -static int blk_validate_limits(struct queue_limits *lim) +int blk_validate_limits(struct queue_limits *lim) { unsigned int max_hw_sectors; + unsigned int logical_block_sectors; + unsigned long seg_size; + int err; /* * Unless otherwise specified, default to 512 byte logical blocks and a @@ -111,6 +262,10 @@ static int blk_validate_limits(struct queue_limits *lim) */ if (!lim->logical_block_size) lim->logical_block_size = SECTOR_SIZE; + else if (blk_validate_block_size(lim->logical_block_size)) { + pr_warn("Invalid logical block size (%d)\n", lim->logical_block_size); + return -EINVAL; + } if (lim->physical_block_size < lim->logical_block_size) lim->physical_block_size = lim->logical_block_size; @@ -122,6 +277,13 @@ static int blk_validate_limits(struct queue_limits *lim) lim->io_min = lim->physical_block_size; /* + * The optimal I/O size may not be aligned to physical block size + * (because it may be limited by dma engines which have no clue about + * block size of the disks attached to them), so we round it down here. + */ + lim->io_opt = round_down(lim->io_opt, lim->physical_block_size); + + /* * max_hw_sectors has a somewhat weird default for historical reason, * but driver really should set their own instead of relying on this * value. @@ -134,8 +296,11 @@ static int blk_validate_limits(struct queue_limits *lim) lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS)) return -EINVAL; + logical_block_sectors = lim->logical_block_size >> SECTOR_SHIFT; + if (WARN_ON_ONCE(logical_block_sectors > lim->max_hw_sectors)) + return -EINVAL; lim->max_hw_sectors = round_down(lim->max_hw_sectors, - lim->logical_block_size >> SECTOR_SHIFT); + logical_block_sectors); /* * The actual max_sectors value is a complex beast and also takes the @@ -146,14 +311,20 @@ static int blk_validate_limits(struct queue_limits *lim) max_hw_sectors = min_not_zero(lim->max_hw_sectors, lim->max_dev_sectors); if (lim->max_user_sectors) { - if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) + if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE) return -EINVAL; lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); + } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) { + lim->max_sectors = + min(max_hw_sectors, lim->io_opt >> SECTOR_SHIFT); + } else if (lim->io_min > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) { + lim->max_sectors = + min(max_hw_sectors, lim->io_min >> SECTOR_SHIFT); } else { lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP); } lim->max_sectors = round_down(lim->max_sectors, - lim->logical_block_size >> SECTOR_SHIFT); + logical_block_sectors); /* * Random default for the maximum number of segments. Driver should not @@ -178,7 +349,7 @@ static int blk_validate_limits(struct queue_limits *lim) */ if (!lim->seg_boundary_mask) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; - if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) + if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1)) return -EINVAL; /* @@ -188,7 +359,10 @@ static int blk_validate_limits(struct queue_limits *lim) * bvec and lower layer bio splitting is supposed to handle the two * correctly. */ - if (!lim->virt_boundary_mask) { + if (lim->virt_boundary_mask) { + if (!lim->max_segment_size) + lim->max_segment_size = UINT_MAX; + } else { /* * The maximum segment size has an odd historic 64k default that * drivers probably should override. Just like the I/O size we @@ -196,10 +370,17 @@ static int blk_validate_limits(struct queue_limits *lim) */ if (!lim->max_segment_size) lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) + if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE)) return -EINVAL; } + /* setup min segment size for building new segment in fast path */ + if (lim->seg_boundary_mask > lim->max_segment_size - 1) + seg_size = lim->max_segment_size; + else + seg_size = lim->seg_boundary_mask + 1; + lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE); + /* * We require drivers to at least do logical block aligned I/O, but * historically could not check for that due to the separate calls @@ -213,11 +394,20 @@ static int blk_validate_limits(struct queue_limits *lim) if (lim->alignment_offset) { lim->alignment_offset &= (lim->physical_block_size - 1); - lim->misaligned = 0; + lim->flags &= ~BLK_FLAG_MISALIGNED; } + if (!(lim->features & BLK_FEAT_WRITE_CACHE)) + lim->features &= ~BLK_FEAT_FUA; + + blk_validate_atomic_write_limits(lim); + + err = blk_validate_integrity_limits(lim); + if (err) + return err; return blk_validate_zoned_limits(lim); } +EXPORT_SYMBOL_GPL(blk_validate_limits); /* * Set the default limits for a newly allocated queue. @lim contains the @@ -241,27 +431,63 @@ int blk_set_default_limits(struct queue_limits *lim) * @lim: limits to apply * * Apply the limits in @lim that were obtained from queue_limits_start_update() - * and updated by the caller to @q. + * and updated by the caller to @q. The caller must have frozen the queue or + * ensure that there are no outstanding I/Os by other means. * * Returns 0 if successful, else a negative error code. */ int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim) - __releases(q->limits_lock) { - int error = blk_validate_limits(lim); + int error; + + error = blk_validate_limits(lim); + if (error) + goto out_unlock; - if (!error) { - q->limits = *lim; - if (q->disk) - blk_apply_bdi_limits(q->disk->bdi, lim); +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + if (q->crypto_profile && lim->integrity.tag_size) { + pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together.\n"); + error = -EINVAL; + goto out_unlock; } +#endif + + q->limits = *lim; + if (q->disk) + blk_apply_bdi_limits(q->disk->bdi, lim); +out_unlock: mutex_unlock(&q->limits_lock); return error; } EXPORT_SYMBOL_GPL(queue_limits_commit_update); /** + * queue_limits_commit_update_frozen - commit an atomic update of queue limits + * @q: queue to update + * @lim: limits to apply + * + * Apply the limits in @lim that were obtained from queue_limits_start_update() + * and updated with the new values by the caller to @q. Freezes the queue + * before the update and unfreezes it after. + * + * Returns 0 if successful, else a negative error code. + */ +int queue_limits_commit_update_frozen(struct request_queue *q, + struct queue_limits *lim) +{ + unsigned int memflags; + int ret; + + memflags = blk_mq_freeze_queue(q); + ret = queue_limits_commit_update(q, lim); + blk_mq_unfreeze_queue(q, memflags); + + return ret; +} +EXPORT_SYMBOL_GPL(queue_limits_commit_update_frozen); + +/** * queue_limits_set - apply queue limits to queue * @q: queue to update * @lim: limits to apply @@ -279,446 +505,159 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim) } EXPORT_SYMBOL_GPL(queue_limits_set); -/** - * blk_queue_bounce_limit - set bounce buffer limit for queue - * @q: the request queue for the device - * @bounce: bounce limit to enforce - * - * Description: - * Force bouncing for ISA DMA ranges or highmem. - * - * DEPRECATED, don't use in new code. - **/ -void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce) +static int queue_limit_alignment_offset(const struct queue_limits *lim, + sector_t sector) { - q->limits.bounce = bounce; + unsigned int granularity = max(lim->physical_block_size, lim->io_min); + unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) + << SECTOR_SHIFT; + + return (granularity + lim->alignment_offset - alignment) % granularity; } -EXPORT_SYMBOL(blk_queue_bounce_limit); -/** - * blk_queue_max_hw_sectors - set max sectors for a request for this queue - * @q: the request queue for the device - * @max_hw_sectors: max hardware sectors in the usual 512b unit - * - * Description: - * Enables a low level driver to set a hard upper limit, - * max_hw_sectors, on the size of requests. max_hw_sectors is set by - * the device driver based upon the capabilities of the I/O - * controller. - * - * max_dev_sectors is a hard limit imposed by the storage device for - * READ/WRITE requests. It is set by the disk driver. - * - * max_sectors is a soft limit imposed by the block layer for - * filesystem type requests. This value can be overridden on a - * per-device basis in /sys/block/<device>/queue/max_sectors_kb. - * The soft limit can not exceed max_hw_sectors. - **/ -void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) +static unsigned int queue_limit_discard_alignment( + const struct queue_limits *lim, sector_t sector) { - struct queue_limits *limits = &q->limits; - unsigned int max_sectors; - - if ((max_hw_sectors << 9) < PAGE_SIZE) { - max_hw_sectors = 1 << (PAGE_SHIFT - 9); - pr_info("%s: set to minimum %u\n", __func__, max_hw_sectors); - } + unsigned int alignment, granularity, offset; - max_hw_sectors = round_down(max_hw_sectors, - limits->logical_block_size >> SECTOR_SHIFT); - limits->max_hw_sectors = max_hw_sectors; + if (!lim->max_discard_sectors) + return 0; - max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); + /* Why are these in bytes, not sectors? */ + alignment = lim->discard_alignment >> SECTOR_SHIFT; + granularity = lim->discard_granularity >> SECTOR_SHIFT; - if (limits->max_user_sectors) - max_sectors = min(max_sectors, limits->max_user_sectors); - else - max_sectors = min(max_sectors, BLK_DEF_MAX_SECTORS_CAP); + /* Offset of the partition start in 'granularity' sectors */ + offset = sector_div(sector, granularity); - max_sectors = round_down(max_sectors, - limits->logical_block_size >> SECTOR_SHIFT); - limits->max_sectors = max_sectors; + /* And why do we do this modulus *again* in blkdev_issue_discard()? */ + offset = (granularity + alignment - offset) % granularity; - if (!q->disk) - return; - q->disk->bdi->io_pages = max_sectors >> (PAGE_SHIFT - 9); + /* Turn it back into bytes, gaah */ + return offset << SECTOR_SHIFT; } -EXPORT_SYMBOL(blk_queue_max_hw_sectors); -/** - * blk_queue_chunk_sectors - set size of the chunk for this queue - * @q: the request queue for the device - * @chunk_sectors: chunk sectors in the usual 512b unit - * - * Description: - * If a driver doesn't want IOs to cross a given chunk size, it can set - * this limit and prevent merging across chunks. Note that the block layer - * must accept a page worth of data at any offset. So if the crossing of - * chunks is a hard limitation in the driver, it must still be prepared - * to split single page bios. - **/ -void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors) +static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs) { - q->limits.chunk_sectors = chunk_sectors; + sectors = round_down(sectors, lbs >> SECTOR_SHIFT); + if (sectors < PAGE_SIZE >> SECTOR_SHIFT) + sectors = PAGE_SIZE >> SECTOR_SHIFT; + return sectors; } -EXPORT_SYMBOL(blk_queue_chunk_sectors); -/** - * blk_queue_max_discard_sectors - set max sectors for a single discard - * @q: the request queue for the device - * @max_discard_sectors: maximum number of sectors to discard - **/ -void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors) +/* Check if second and later bottom devices are compliant */ +static bool blk_stack_atomic_writes_tail(struct queue_limits *t, + struct queue_limits *b) { - struct queue_limits *lim = &q->limits; + /* We're not going to support different boundary sizes.. yet */ + if (t->atomic_write_hw_boundary != b->atomic_write_hw_boundary) + return false; - lim->max_hw_discard_sectors = max_discard_sectors; - lim->max_discard_sectors = - min(max_discard_sectors, lim->max_user_discard_sectors); -} -EXPORT_SYMBOL(blk_queue_max_discard_sectors); + /* Can't support this */ + if (t->atomic_write_hw_unit_min > b->atomic_write_hw_unit_max) + return false; -/** - * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase - * @q: the request queue for the device - * @max_sectors: maximum number of sectors to secure_erase - **/ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors) -{ - q->limits.max_secure_erase_sectors = max_sectors; -} -EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors); + /* Or this */ + if (t->atomic_write_hw_unit_max < b->atomic_write_hw_unit_min) + return false; -/** - * blk_queue_max_write_zeroes_sectors - set max sectors for a single - * write zeroes - * @q: the request queue for the device - * @max_write_zeroes_sectors: maximum number of sectors to write per command - **/ -void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_zeroes_sectors) -{ - q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors; + t->atomic_write_hw_max = min(t->atomic_write_hw_max, + b->atomic_write_hw_max); + t->atomic_write_hw_unit_min = max(t->atomic_write_hw_unit_min, + b->atomic_write_hw_unit_min); + t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max, + b->atomic_write_hw_unit_max); + return true; } -EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors); -/** - * blk_queue_max_zone_append_sectors - set max sectors for a single zone append - * @q: the request queue for the device - * @max_zone_append_sectors: maximum number of sectors to write per command - **/ -void blk_queue_max_zone_append_sectors(struct request_queue *q, - unsigned int max_zone_append_sectors) +/* Check for valid boundary of first bottom device */ +static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t, + struct queue_limits *b) { - unsigned int max_sectors; - - if (WARN_ON(!blk_queue_is_zoned(q))) - return; - - max_sectors = min(q->limits.max_hw_sectors, max_zone_append_sectors); - max_sectors = min(q->limits.chunk_sectors, max_sectors); - /* - * Signal eventual driver bugs resulting in the max_zone_append sectors limit - * being 0 due to a 0 argument, the chunk_sectors limit (zone size) not set, - * or the max_hw_sectors limit not set. + * Ensure atomic write boundary is aligned with chunk sectors. Stacked + * devices store chunk sectors in t->io_min. */ - WARN_ON(!max_sectors); + if (b->atomic_write_hw_boundary > t->io_min && + b->atomic_write_hw_boundary % t->io_min) + return false; + if (t->io_min > b->atomic_write_hw_boundary && + t->io_min % b->atomic_write_hw_boundary) + return false; - q->limits.max_zone_append_sectors = max_sectors; + t->atomic_write_hw_boundary = b->atomic_write_hw_boundary; + return true; } -EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors); -/** - * blk_queue_max_segments - set max hw segments for a request for this queue - * @q: the request queue for the device - * @max_segments: max number of segments - * - * Description: - * Enables a low level driver to set an upper limit on the number of - * hw data segments in a request. - **/ -void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) -{ - if (!max_segments) { - max_segments = 1; - pr_info("%s: set to minimum %u\n", __func__, max_segments); - } - - q->limits.max_segments = max_segments; -} -EXPORT_SYMBOL(blk_queue_max_segments); -/** - * blk_queue_max_discard_segments - set max segments for discard requests - * @q: the request queue for the device - * @max_segments: max number of segments - * - * Description: - * Enables a low level driver to set an upper limit on the number of - * segments in a discard request. - **/ -void blk_queue_max_discard_segments(struct request_queue *q, - unsigned short max_segments) +/* Check stacking of first bottom device */ +static bool blk_stack_atomic_writes_head(struct queue_limits *t, + struct queue_limits *b) { - q->limits.max_discard_segments = max_segments; -} -EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments); + if (b->atomic_write_hw_boundary && + !blk_stack_atomic_writes_boundary_head(t, b)) + return false; -/** - * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg - * @q: the request queue for the device - * @max_size: max size of segment in bytes - * - * Description: - * Enables a low level driver to set an upper limit on the size of a - * coalesced segment - **/ -void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) -{ - if (max_size < PAGE_SIZE) { - max_size = PAGE_SIZE; - pr_info("%s: set to minimum %u\n", __func__, max_size); + if (t->io_min <= SECTOR_SIZE) { + /* No chunk sectors, so use bottom device values directly */ + t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max; + t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min; + t->atomic_write_hw_max = b->atomic_write_hw_max; + return true; } - /* see blk_queue_virt_boundary() for the explanation */ - WARN_ON_ONCE(q->limits.virt_boundary_mask); - - q->limits.max_segment_size = max_size; -} -EXPORT_SYMBOL(blk_queue_max_segment_size); - -/** - * blk_queue_logical_block_size - set logical block size for the queue - * @q: the request queue for the device - * @size: the logical block size, in bytes - * - * Description: - * This should be set to the lowest possible block size that the - * storage device can address. The default of 512 covers most - * hardware. - **/ -void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) -{ - struct queue_limits *limits = &q->limits; - - limits->logical_block_size = size; - - if (limits->discard_granularity < limits->logical_block_size) - limits->discard_granularity = limits->logical_block_size; - - if (limits->physical_block_size < size) - limits->physical_block_size = size; - - if (limits->io_min < limits->physical_block_size) - limits->io_min = limits->physical_block_size; - - limits->max_hw_sectors = - round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT); - limits->max_sectors = - round_down(limits->max_sectors, size >> SECTOR_SHIFT); -} -EXPORT_SYMBOL(blk_queue_logical_block_size); - -/** - * blk_queue_physical_block_size - set physical block size for the queue - * @q: the request queue for the device - * @size: the physical block size, in bytes - * - * Description: - * This should be set to the lowest possible sector size that the - * hardware can operate on without reverting to read-modify-write - * operations. - */ -void blk_queue_physical_block_size(struct request_queue *q, unsigned int size) -{ - q->limits.physical_block_size = size; - - if (q->limits.physical_block_size < q->limits.logical_block_size) - q->limits.physical_block_size = q->limits.logical_block_size; - - if (q->limits.discard_granularity < q->limits.physical_block_size) - q->limits.discard_granularity = q->limits.physical_block_size; - - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; -} -EXPORT_SYMBOL(blk_queue_physical_block_size); - -/** - * blk_queue_zone_write_granularity - set zone write granularity for the queue - * @q: the request queue for the zoned device - * @size: the zone write granularity size, in bytes - * - * Description: - * This should be set to the lowest possible size allowing to write in - * sequential zones of a zoned block device. - */ -void blk_queue_zone_write_granularity(struct request_queue *q, - unsigned int size) -{ - if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) - return; - - q->limits.zone_write_granularity = size; - - if (q->limits.zone_write_granularity < q->limits.logical_block_size) - q->limits.zone_write_granularity = q->limits.logical_block_size; -} -EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity); + /* + * Find values for limits which work for chunk size. + * b->atomic_write_hw_unit_{min, max} may not be aligned with chunk + * size (t->io_min), as chunk size is not restricted to a power-of-2. + * So we need to find highest power-of-2 which works for the chunk + * size. + * As an example scenario, we could have b->unit_max = 16K and + * t->io_min = 24K. For this case, reduce t->unit_max to a value + * aligned with both limits, i.e. 8K in this example. + */ + t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max; + while (t->io_min % t->atomic_write_hw_unit_max) + t->atomic_write_hw_unit_max /= 2; -/** - * blk_queue_alignment_offset - set physical block alignment offset - * @q: the request queue for the device - * @offset: alignment offset in bytes - * - * Description: - * Some devices are naturally misaligned to compensate for things like - * the legacy DOS partition table 63-sector offset. Low-level drivers - * should call this function for devices whose first sector is not - * naturally aligned. - */ -void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) -{ - q->limits.alignment_offset = - offset & (q->limits.physical_block_size - 1); - q->limits.misaligned = 0; -} -EXPORT_SYMBOL(blk_queue_alignment_offset); + t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min, + t->atomic_write_hw_unit_max); + t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min); -void disk_update_readahead(struct gendisk *disk) -{ - blk_apply_bdi_limits(disk->bdi, &disk->queue->limits); + return true; } -EXPORT_SYMBOL_GPL(disk_update_readahead); -/** - * blk_limits_io_min - set minimum request size for a device - * @limits: the queue limits - * @min: smallest I/O size in bytes - * - * Description: - * Some devices have an internal block size bigger than the reported - * hardware sector size. This function can be used to signal the - * smallest I/O the device can perform without incurring a performance - * penalty. - */ -void blk_limits_io_min(struct queue_limits *limits, unsigned int min) +static void blk_stack_atomic_writes_limits(struct queue_limits *t, + struct queue_limits *b, sector_t start) { - limits->io_min = min; + if (!(b->features & BLK_FEAT_ATOMIC_WRITES)) + goto unsupported; - if (limits->io_min < limits->logical_block_size) - limits->io_min = limits->logical_block_size; + if (!b->atomic_write_hw_unit_min) + goto unsupported; - if (limits->io_min < limits->physical_block_size) - limits->io_min = limits->physical_block_size; -} -EXPORT_SYMBOL(blk_limits_io_min); + if (!blk_atomic_write_start_sect_aligned(start, b)) + goto unsupported; -/** - * blk_queue_io_min - set minimum request size for the queue - * @q: the request queue for the device - * @min: smallest I/O size in bytes - * - * Description: - * Storage devices may report a granularity or preferred minimum I/O - * size which is the smallest request the device can perform without - * incurring a performance penalty. For disk drives this is often the - * physical block size. For RAID arrays it is often the stripe chunk - * size. A properly aligned multiple of minimum_io_size is the - * preferred request size for workloads where a high number of I/O - * operations is desired. - */ -void blk_queue_io_min(struct request_queue *q, unsigned int min) -{ - blk_limits_io_min(&q->limits, min); -} -EXPORT_SYMBOL(blk_queue_io_min); - -/** - * blk_limits_io_opt - set optimal request size for a device - * @limits: the queue limits - * @opt: smallest I/O size in bytes - * - * Description: - * Storage devices may report an optimal I/O size, which is the - * device's preferred unit for sustained I/O. This is rarely reported - * for disk drives. For RAID arrays it is usually the stripe width or - * the internal track size. A properly aligned multiple of - * optimal_io_size is the preferred request size for workloads where - * sustained throughput is desired. - */ -void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt) -{ - limits->io_opt = opt; -} -EXPORT_SYMBOL(blk_limits_io_opt); - -/** - * blk_queue_io_opt - set optimal request size for the queue - * @q: the request queue for the device - * @opt: optimal request size in bytes - * - * Description: - * Storage devices may report an optimal I/O size, which is the - * device's preferred unit for sustained I/O. This is rarely reported - * for disk drives. For RAID arrays it is usually the stripe width or - * the internal track size. A properly aligned multiple of - * optimal_io_size is the preferred request size for workloads where - * sustained throughput is desired. - */ -void blk_queue_io_opt(struct request_queue *q, unsigned int opt) -{ - blk_limits_io_opt(&q->limits, opt); - if (!q->disk) + /* + * If atomic_write_hw_max is set, we have already stacked 1x bottom + * device, so check for compliance. + */ + if (t->atomic_write_hw_max) { + if (!blk_stack_atomic_writes_tail(t, b)) + goto unsupported; return; - q->disk->bdi->ra_pages = - max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); -} -EXPORT_SYMBOL(blk_queue_io_opt); - -static int queue_limit_alignment_offset(const struct queue_limits *lim, - sector_t sector) -{ - unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) - << SECTOR_SHIFT; - - return (granularity + lim->alignment_offset - alignment) % granularity; -} - -static unsigned int queue_limit_discard_alignment( - const struct queue_limits *lim, sector_t sector) -{ - unsigned int alignment, granularity, offset; - - if (!lim->max_discard_sectors) - return 0; - - /* Why are these in bytes, not sectors? */ - alignment = lim->discard_alignment >> SECTOR_SHIFT; - granularity = lim->discard_granularity >> SECTOR_SHIFT; - if (!granularity) - return 0; - - /* Offset of the partition start in 'granularity' sectors */ - offset = sector_div(sector, granularity); - - /* And why do we do this modulus *again* in blkdev_issue_discard()? */ - offset = (granularity + alignment - offset) % granularity; + } - /* Turn it back into bytes, gaah */ - return offset << SECTOR_SHIFT; -} + if (!blk_stack_atomic_writes_head(t, b)) + goto unsupported; + return; -static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs) -{ - sectors = round_down(sectors, lbs >> SECTOR_SHIFT); - if (sectors < PAGE_SIZE >> SECTOR_SHIFT) - sectors = PAGE_SIZE >> SECTOR_SHIFT; - return sectors; +unsupported: + t->atomic_write_hw_max = 0; + t->atomic_write_hw_unit_max = 0; + t->atomic_write_hw_unit_min = 0; + t->atomic_write_hw_boundary = 0; } /** @@ -747,14 +686,30 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, { unsigned int top, bottom, alignment, ret = 0; + t->features |= (b->features & BLK_FEAT_INHERIT_MASK); + + /* + * Some feaures need to be supported both by the stacking driver and all + * underlying devices. The stacking driver sets these flags before + * stacking the limits, and this will clear the flags if any of the + * underlying devices does not support it. + */ + if (!(b->features & BLK_FEAT_NOWAIT)) + t->features &= ~BLK_FEAT_NOWAIT; + if (!(b->features & BLK_FEAT_POLL)) + t->features &= ~BLK_FEAT_POLL; + + t->flags |= (b->flags & BLK_FLAG_MISALIGNED); + t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); + t->max_user_sectors = min_not_zero(t->max_user_sectors, + b->max_user_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors); t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors, b->max_write_zeroes_sectors); - t->max_zone_append_sectors = min(t->max_zone_append_sectors, - b->max_zone_append_sectors); - t->bounce = max(t->bounce, b->bounce); + t->max_hw_zone_append_sectors = min(t->max_hw_zone_append_sectors, + b->max_hw_zone_append_sectors); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); @@ -770,8 +725,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); - t->misaligned |= b->misaligned; - alignment = queue_limit_alignment_offset(b, start); /* Bottom device has different alignment. Check that it is @@ -785,7 +738,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Verify that top and bottom intervals line up */ if (max(top, bottom) % min(top, bottom)) { - t->misaligned = 1; + t->flags |= BLK_FLAG_MISALIGNED; ret = -1; } } @@ -807,42 +760,38 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Physical block size a multiple of the logical block size? */ if (t->physical_block_size & (t->logical_block_size - 1)) { t->physical_block_size = t->logical_block_size; - t->misaligned = 1; + t->flags |= BLK_FLAG_MISALIGNED; ret = -1; } /* Minimum I/O a multiple of the physical block size? */ if (t->io_min & (t->physical_block_size - 1)) { t->io_min = t->physical_block_size; - t->misaligned = 1; + t->flags |= BLK_FLAG_MISALIGNED; ret = -1; } /* Optimal I/O a multiple of the physical block size? */ if (t->io_opt & (t->physical_block_size - 1)) { t->io_opt = 0; - t->misaligned = 1; + t->flags |= BLK_FLAG_MISALIGNED; ret = -1; } /* chunk_sectors a multiple of the physical block size? */ if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) { t->chunk_sectors = 0; - t->misaligned = 1; + t->flags |= BLK_FLAG_MISALIGNED; ret = -1; } - t->raid_partial_stripes_expensive = - max(t->raid_partial_stripes_expensive, - b->raid_partial_stripes_expensive); - /* Find lowest common alignment_offset */ t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment) % max(t->physical_block_size, t->io_min); /* Verify that new alignment_offset is on a logical block boundary */ if (t->alignment_offset & (t->logical_block_size - 1)) { - t->misaligned = 1; + t->flags |= BLK_FLAG_MISALIGNED; ret = -1; } @@ -854,16 +803,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, if (b->discard_granularity) { alignment = queue_limit_discard_alignment(b, start); - if (t->discard_granularity != 0 && - t->discard_alignment != alignment) { - top = t->discard_granularity + t->discard_alignment; - bottom = b->discard_granularity + alignment; - - /* Verify that top and bottom intervals line up */ - if ((max(top, bottom) % min(top, bottom)) != 0) - t->discard_misaligned = 1; - } - t->max_discard_sectors = min_not_zero(t->max_discard_sectors, b->max_discard_sectors); t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors, @@ -877,11 +816,12 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_secure_erase_sectors); t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); - t->zoned = max(t->zoned, b->zoned); - if (!t->zoned) { + if (!(t->features & BLK_FEAT_ZONED)) { t->zone_write_granularity = 0; t->max_zone_append_sectors = 0; } + blk_stack_atomic_writes_limits(t, b, start); + return ret; } EXPORT_SYMBOL(blk_stack_limits); @@ -904,7 +844,7 @@ EXPORT_SYMBOL(blk_stack_limits); void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx) { - if (blk_stack_limits(t, &bdev_get_queue(bdev)->limits, + if (blk_stack_limits(t, bdev_limits(bdev), get_start_sect(bdev) + offset)) pr_notice("%s: Warning: Device %pg is misaligned\n", pfx, bdev); @@ -912,96 +852,57 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, EXPORT_SYMBOL_GPL(queue_limits_stack_bdev); /** - * blk_queue_update_dma_pad - update pad mask - * @q: the request queue for the device - * @mask: pad mask - * - * Update dma pad mask. - * - * Appending pad buffer to a request modifies the last entry of a - * scatter list such that it includes the pad buffer. - **/ -void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) -{ - if (mask > q->dma_pad_mask) - q->dma_pad_mask = mask; -} -EXPORT_SYMBOL(blk_queue_update_dma_pad); - -/** - * blk_queue_segment_boundary - set boundary rules for segment merging - * @q: the request queue for the device - * @mask: the memory boundary mask - **/ -void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) -{ - if (mask < PAGE_SIZE - 1) { - mask = PAGE_SIZE - 1; - pr_info("%s: set to minimum %lx\n", __func__, mask); - } - - q->limits.seg_boundary_mask = mask; -} -EXPORT_SYMBOL(blk_queue_segment_boundary); - -/** - * blk_queue_virt_boundary - set boundary rules for bio merging - * @q: the request queue for the device - * @mask: the memory boundary mask - **/ -void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask) -{ - q->limits.virt_boundary_mask = mask; - - /* - * Devices that require a virtual boundary do not support scatter/gather - * I/O natively, but instead require a descriptor list entry for each - * page (which might not be idential to the Linux PAGE_SIZE). Because - * of that they are not limited by our notion of "segment size". - */ - if (mask) - q->limits.max_segment_size = UINT_MAX; -} -EXPORT_SYMBOL(blk_queue_virt_boundary); - -/** - * blk_queue_dma_alignment - set dma length and memory alignment - * @q: the request queue for the device - * @mask: alignment mask + * queue_limits_stack_integrity - stack integrity profile + * @t: target queue limits + * @b: base queue limits * - * description: - * set required memory and length alignment for direct dma transactions. - * this is used when building direct io requests for the queue. + * Check if the integrity profile in the @b can be stacked into the + * target @t. Stacking is possible if either: * - **/ -void blk_queue_dma_alignment(struct request_queue *q, int mask) -{ - q->limits.dma_alignment = mask; -} -EXPORT_SYMBOL(blk_queue_dma_alignment); - -/** - * blk_queue_update_dma_alignment - update dma length and memory alignment - * @q: the request queue for the device - * @mask: alignment mask + * a) does not have any integrity information stacked into it yet + * b) the integrity profile in @b is identical to the one in @t * - * description: - * update required memory and length alignment for direct dma transactions. - * If the requested alignment is larger than the current alignment, then - * the current queue alignment is updated to the new value, otherwise it - * is left alone. The design of this is to allow multiple objects - * (driver, device, transport etc) to set their respective - * alignments without having them interfere. - * - **/ -void blk_queue_update_dma_alignment(struct request_queue *q, int mask) -{ - BUG_ON(mask > PAGE_SIZE); + * If @b can be stacked into @t, return %true. Else return %false and clear the + * integrity information in @t. + */ +bool queue_limits_stack_integrity(struct queue_limits *t, + struct queue_limits *b) +{ + struct blk_integrity *ti = &t->integrity; + struct blk_integrity *bi = &b->integrity; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return true; + + if (ti->flags & BLK_INTEGRITY_STACKED) { + if (ti->tuple_size != bi->tuple_size) + goto incompatible; + if (ti->interval_exp != bi->interval_exp) + goto incompatible; + if (ti->tag_size != bi->tag_size) + goto incompatible; + if (ti->csum_type != bi->csum_type) + goto incompatible; + if ((ti->flags & BLK_INTEGRITY_REF_TAG) != + (bi->flags & BLK_INTEGRITY_REF_TAG)) + goto incompatible; + } else { + ti->flags = BLK_INTEGRITY_STACKED; + ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | + (bi->flags & BLK_INTEGRITY_REF_TAG); + ti->csum_type = bi->csum_type; + ti->tuple_size = bi->tuple_size; + ti->pi_offset = bi->pi_offset; + ti->interval_exp = bi->interval_exp; + ti->tag_size = bi->tag_size; + } + return true; - if (mask > q->limits.dma_alignment) - q->limits.dma_alignment = mask; +incompatible: + memset(ti, 0, sizeof(*ti)); + return false; } -EXPORT_SYMBOL(blk_queue_update_dma_alignment); +EXPORT_SYMBOL_GPL(queue_limits_stack_integrity); /** * blk_set_queue_depth - tell the block layer about the device queue depth @@ -1016,92 +917,11 @@ void blk_set_queue_depth(struct request_queue *q, unsigned int depth) } EXPORT_SYMBOL(blk_set_queue_depth); -/** - * blk_queue_write_cache - configure queue's write cache - * @q: the request queue for the device - * @wc: write back cache on or off - * @fua: device supports FUA writes, if true - * - * Tell the block layer about the write cache of @q. - */ -void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) -{ - if (wc) { - blk_queue_flag_set(QUEUE_FLAG_HW_WC, q); - blk_queue_flag_set(QUEUE_FLAG_WC, q); - } else { - blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q); - blk_queue_flag_clear(QUEUE_FLAG_WC, q); - } - if (fua) - blk_queue_flag_set(QUEUE_FLAG_FUA, q); - else - blk_queue_flag_clear(QUEUE_FLAG_FUA, q); -} -EXPORT_SYMBOL_GPL(blk_queue_write_cache); - -/** - * blk_queue_required_elevator_features - Set a queue required elevator features - * @q: the request queue for the target device - * @features: Required elevator features OR'ed together - * - * Tell the block layer that for the device controlled through @q, only the - * only elevators that can be used are those that implement at least the set of - * features specified by @features. - */ -void blk_queue_required_elevator_features(struct request_queue *q, - unsigned int features) -{ - q->required_elevator_features = features; -} -EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features); - -/** - * blk_queue_can_use_dma_map_merging - configure queue for merging segments. - * @q: the request queue for the device - * @dev: the device pointer for dma - * - * Tell the block layer about merging the segments by dma map of @q. - */ -bool blk_queue_can_use_dma_map_merging(struct request_queue *q, - struct device *dev) -{ - unsigned long boundary = dma_get_merge_boundary(dev); - - if (!boundary) - return false; - - /* No need to update max_segment_size. see blk_queue_virt_boundary() */ - blk_queue_virt_boundary(q, boundary); - - return true; -} -EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); - -/** - * disk_set_zoned - inidicate a zoned device - * @disk: gendisk to configure - */ -void disk_set_zoned(struct gendisk *disk) -{ - struct request_queue *q = disk->queue; - - WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)); - - /* - * Set the zone write granularity to the device logical block - * size by default. The driver can change this value if needed. - */ - q->limits.zoned = true; - blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); -} -EXPORT_SYMBOL_GPL(disk_set_zoned); - int bdev_alignment_offset(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (q->limits.misaligned) + if (q->limits.flags & BLK_FLAG_MISALIGNED) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, |