summaryrefslogtreecommitdiff
path: root/block/blk-settings.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-settings.c')
-rw-r--r--block/blk-settings.c159
1 files changed, 124 insertions, 35 deletions
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a000daafbfb4..07874e9b609f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -14,6 +14,8 @@
#include <linux/jiffies.h>
#include <linux/gfp.h>
#include <linux/dma-mapping.h>
+#include <linux/t10-pi.h>
+#include <linux/crc64.h>
#include "blk.h"
#include "blk-rq-qos.h"
@@ -50,6 +52,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_sectors = UINT_MAX;
lim->max_dev_sectors = UINT_MAX;
lim->max_write_zeroes_sectors = UINT_MAX;
+ lim->max_hw_wzeroes_unmap_sectors = UINT_MAX;
+ lim->max_user_wzeroes_unmap_sectors = UINT_MAX;
lim->max_hw_zone_append_sectors = UINT_MAX;
lim->max_user_discard_sectors = UINT_MAX;
}
@@ -58,16 +62,24 @@ EXPORT_SYMBOL(blk_set_stacking_limits);
void blk_apply_bdi_limits(struct backing_dev_info *bdi,
struct queue_limits *lim)
{
+ u64 io_opt = lim->io_opt;
+
/*
* For read-ahead of large files to be effective, we need to read ahead
- * at least twice the optimal I/O size.
+ * at least twice the optimal I/O size. For rotational devices that do
+ * not report an optimal I/O size (e.g. ATA HDDs), use the maximum I/O
+ * size to avoid falling back to the (rather inefficient) small default
+ * read-ahead size.
*
* There is no hardware limitation for the read-ahead size and the user
* might have increased the read-ahead size through sysfs, so don't ever
* decrease it.
*/
+ if (!io_opt && (lim->features & BLK_FEAT_ROTATIONAL))
+ io_opt = (u64)lim->max_sectors << SECTOR_SHIFT;
+
bdi->ra_pages = max3(bdi->ra_pages,
- lim->io_opt * 2 / PAGE_SIZE,
+ io_opt * 2 >> PAGE_SHIFT,
VM_READAHEAD_PAGES);
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
}
@@ -114,7 +126,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
{
struct blk_integrity *bi = &lim->integrity;
- if (!bi->tuple_size) {
+ if (!bi->metadata_size) {
if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE ||
bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) {
pr_warn("invalid PI settings.\n");
@@ -135,6 +147,42 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
return -EINVAL;
}
+ if (bi->pi_tuple_size > bi->metadata_size) {
+ pr_warn("pi_tuple_size (%u) exceeds metadata_size (%u)\n",
+ bi->pi_tuple_size,
+ bi->metadata_size);
+ return -EINVAL;
+ }
+
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_NONE:
+ if (bi->pi_tuple_size) {
+ pr_warn("pi_tuple_size must be 0 when checksum type \
+ is none\n");
+ return -EINVAL;
+ }
+ break;
+ case BLK_INTEGRITY_CSUM_CRC:
+ case BLK_INTEGRITY_CSUM_IP:
+ if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) {
+ pr_warn("pi_tuple_size mismatch for T10 PI: expected \
+ %zu, got %u\n",
+ sizeof(struct t10_pi_tuple),
+ bi->pi_tuple_size);
+ return -EINVAL;
+ }
+ break;
+ case BLK_INTEGRITY_CSUM_CRC64:
+ if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) {
+ pr_warn("pi_tuple_size mismatch for CRC64 PI: \
+ expected %zu, got %u\n",
+ sizeof(struct crc64_pi_tuple),
+ bi->pi_tuple_size);
+ return -EINVAL;
+ }
+ break;
+ }
+
if (!bi->interval_exp)
bi->interval_exp = ilog2(lim->logical_block_size);
@@ -181,6 +229,8 @@ static void blk_atomic_writes_update_limits(struct queue_limits *lim)
static void blk_validate_atomic_write_limits(struct queue_limits *lim)
{
unsigned int boundary_sectors;
+ unsigned int atomic_write_hw_max_sectors =
+ lim->atomic_write_hw_max >> SECTOR_SHIFT;
if (!(lim->features & BLK_FEAT_ATOMIC_WRITES))
goto unsupported;
@@ -202,6 +252,10 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
lim->atomic_write_hw_max))
goto unsupported;
+ if (WARN_ON_ONCE(lim->chunk_sectors &&
+ atomic_write_hw_max_sectors > lim->chunk_sectors))
+ goto unsupported;
+
boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
if (boundary_sectors) {
@@ -266,8 +320,12 @@ int blk_validate_limits(struct queue_limits *lim)
pr_warn("Invalid logical block size (%d)\n", lim->logical_block_size);
return -EINVAL;
}
- if (lim->physical_block_size < lim->logical_block_size)
+ if (lim->physical_block_size < lim->logical_block_size) {
lim->physical_block_size = lim->logical_block_size;
+ } else if (!is_power_of_2(lim->physical_block_size)) {
+ pr_warn("Invalid physical block size (%d)\n", lim->physical_block_size);
+ return -EINVAL;
+ }
/*
* The minimum I/O size defaults to the physical block size unless
@@ -333,15 +391,28 @@ int blk_validate_limits(struct queue_limits *lim)
if (!lim->max_segments)
lim->max_segments = BLK_MAX_SEGMENTS;
+ if (lim->max_hw_wzeroes_unmap_sectors &&
+ lim->max_hw_wzeroes_unmap_sectors != lim->max_write_zeroes_sectors)
+ return -EINVAL;
+ lim->max_wzeroes_unmap_sectors = min(lim->max_hw_wzeroes_unmap_sectors,
+ lim->max_user_wzeroes_unmap_sectors);
+
lim->max_discard_sectors =
min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors);
+ /*
+ * When discard is not supported, discard_granularity should be reported
+ * as 0 to userspace.
+ */
+ if (lim->max_discard_sectors)
+ lim->discard_granularity =
+ max(lim->discard_granularity, lim->physical_block_size);
+ else
+ lim->discard_granularity = 0;
+
if (!lim->max_discard_segments)
lim->max_discard_segments = 1;
- if (lim->discard_granularity < lim->physical_block_size)
- lim->discard_granularity = lim->physical_block_size;
-
/*
* By default there is no limit on the segment boundary alignment,
* but if there is one it can't be smaller than the page size as
@@ -418,10 +489,11 @@ int blk_set_default_limits(struct queue_limits *lim)
{
/*
* Most defaults are set by capping the bounds in blk_validate_limits,
- * but max_user_discard_sectors is special and needs an explicit
- * initialization to the max value here.
+ * but these limits are special and need an explicit initialization to
+ * the max value here.
*/
lim->max_user_discard_sectors = UINT_MAX;
+ lim->max_user_wzeroes_unmap_sectors = UINT_MAX;
return blk_validate_limits(lim);
}
@@ -589,41 +661,50 @@ static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t,
return true;
}
-
-/* Check stacking of first bottom device */
-static bool blk_stack_atomic_writes_head(struct queue_limits *t,
- struct queue_limits *b)
+static void blk_stack_atomic_writes_chunk_sectors(struct queue_limits *t)
{
- if (b->atomic_write_hw_boundary &&
- !blk_stack_atomic_writes_boundary_head(t, b))
- return false;
+ unsigned int chunk_bytes;
- if (t->io_min <= SECTOR_SIZE) {
- /* No chunk sectors, so use bottom device values directly */
- t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
- t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
- t->atomic_write_hw_max = b->atomic_write_hw_max;
- return true;
- }
+ if (!t->chunk_sectors)
+ return;
+
+ /*
+ * If chunk sectors is so large that its value in bytes overflows
+ * UINT_MAX, then just shift it down so it definitely will fit.
+ * We don't support atomic writes of such a large size anyway.
+ */
+ if (check_shl_overflow(t->chunk_sectors, SECTOR_SHIFT, &chunk_bytes))
+ chunk_bytes = t->chunk_sectors;
/*
* Find values for limits which work for chunk size.
* b->atomic_write_hw_unit_{min, max} may not be aligned with chunk
- * size (t->io_min), as chunk size is not restricted to a power-of-2.
+ * size, as the chunk size is not restricted to a power-of-2.
* So we need to find highest power-of-2 which works for the chunk
* size.
- * As an example scenario, we could have b->unit_max = 16K and
- * t->io_min = 24K. For this case, reduce t->unit_max to a value
- * aligned with both limits, i.e. 8K in this example.
+ * As an example scenario, we could have t->unit_max = 16K and
+ * t->chunk_sectors = 24KB. For this case, reduce t->unit_max to a
+ * value aligned with both limits, i.e. 8K in this example.
*/
- t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
- while (t->io_min % t->atomic_write_hw_unit_max)
- t->atomic_write_hw_unit_max /= 2;
+ t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max,
+ max_pow_of_two_factor(chunk_bytes));
- t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min,
+ t->atomic_write_hw_unit_min = min(t->atomic_write_hw_unit_min,
t->atomic_write_hw_unit_max);
- t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min);
+ t->atomic_write_hw_max = min(t->atomic_write_hw_max, chunk_bytes);
+}
+
+/* Check stacking of first bottom device */
+static bool blk_stack_atomic_writes_head(struct queue_limits *t,
+ struct queue_limits *b)
+{
+ if (b->atomic_write_hw_boundary &&
+ !blk_stack_atomic_writes_boundary_head(t, b))
+ return false;
+ t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
+ t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
+ t->atomic_write_hw_max = b->atomic_write_hw_max;
return true;
}
@@ -651,6 +732,7 @@ static void blk_stack_atomic_writes_limits(struct queue_limits *t,
if (!blk_stack_atomic_writes_head(t, b))
goto unsupported;
+ blk_stack_atomic_writes_chunk_sectors(t);
return;
unsupported:
@@ -708,6 +790,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
b->max_write_zeroes_sectors);
+ t->max_user_wzeroes_unmap_sectors =
+ min(t->max_user_wzeroes_unmap_sectors,
+ b->max_user_wzeroes_unmap_sectors);
+ t->max_hw_wzeroes_unmap_sectors =
+ min(t->max_hw_wzeroes_unmap_sectors,
+ b->max_hw_wzeroes_unmap_sectors);
+
t->max_hw_zone_append_sectors = min(t->max_hw_zone_append_sectors,
b->max_hw_zone_append_sectors);
@@ -779,7 +868,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
}
/* chunk_sectors a multiple of the physical block size? */
- if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
+ if (t->chunk_sectors % (t->physical_block_size >> SECTOR_SHIFT)) {
t->chunk_sectors = 0;
t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
@@ -875,7 +964,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
return true;
if (ti->flags & BLK_INTEGRITY_STACKED) {
- if (ti->tuple_size != bi->tuple_size)
+ if (ti->metadata_size != bi->metadata_size)
goto incompatible;
if (ti->interval_exp != bi->interval_exp)
goto incompatible;
@@ -891,7 +980,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) |
(bi->flags & BLK_INTEGRITY_REF_TAG);
ti->csum_type = bi->csum_type;
- ti->tuple_size = bi->tuple_size;
+ ti->metadata_size = bi->metadata_size;
ti->pi_offset = bi->pi_offset;
ti->interval_exp = bi->interval_exp;
ti->tag_size = bi->tag_size;