summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-merge.c9
-rw-r--r--block/blk-settings.c14
-rw-r--r--block/blk-zoned.c76
-rw-r--r--block/blk.h9
-rw-r--r--block/partitions/mac.c18
6 files changed, 109 insertions, 19 deletions
diff --git a/block/bio.c b/block/bio.c
index f0c416e5931d..6ac5983ba51e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -77,7 +77,7 @@ struct bio_slab {
struct kmem_cache *slab;
unsigned int slab_ref;
unsigned int slab_size;
- char name[8];
+ char name[12];
};
static DEFINE_MUTEX(bio_slab_lock);
static DEFINE_XARRAY(bio_slabs);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 15cd231d560c..1d1589c35297 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -270,7 +270,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
unsigned max_segs, unsigned max_bytes)
{
- unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
+ unsigned max_len = max_bytes - *bytes;
unsigned len = min(bv->bv_len, max_len);
unsigned total_len = 0;
unsigned seg_size = 0;
@@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
if (nsegs < lim->max_segments &&
bytes + bv.bv_len <= max_bytes &&
- bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
+ bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
nsegs++;
bytes += bv.bv_len;
} else {
@@ -556,11 +556,14 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
{
struct req_iterator iter = {
.bio = rq->bio,
- .iter = rq->bio->bi_iter,
};
struct phys_vec vec;
int nsegs = 0;
+ /* the internal flush request may not have bio attached */
+ if (iter.bio)
+ iter.iter = iter.bio->bi_iter;
+
while (blk_map_iter_next(rq, &iter, &vec)) {
*last_sg = blk_next_sg(last_sg, sglist);
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c44dadc35e1e..b9c6f0ec1c49 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim)
{
unsigned int max_hw_sectors;
unsigned int logical_block_sectors;
+ unsigned long seg_size;
int err;
/*
@@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim)
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
lim->max_dev_sectors);
if (lim->max_user_sectors) {
- if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
+ if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE)
return -EINVAL;
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
} else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
@@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim)
*/
if (!lim->seg_boundary_mask)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
+ if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1))
return -EINVAL;
/*
@@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim)
*/
if (!lim->max_segment_size)
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
- if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
+ if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE))
return -EINVAL;
}
+ /* setup min segment size for building new segment in fast path */
+ if (lim->seg_boundary_mask > lim->max_segment_size - 1)
+ seg_size = lim->max_segment_size;
+ else
+ seg_size = lim->seg_boundary_mask + 1;
+ lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE);
+
/*
* We require drivers to at least do logical block aligned I/O, but
* historically could not check for that due to the separate calls
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 761ea662ddc3..0c77244a35c9 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
}
}
hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
+ atomic_inc(&disk->nr_zone_wplugs);
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
return true;
}
-static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
- sector_t sector)
+static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk,
+ sector_t sector)
{
unsigned int zno = disk_zone_no(disk, sector);
unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
@@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
return NULL;
}
+static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
+ sector_t sector)
+{
+ if (!atomic_read(&disk->nr_zone_wplugs))
+ return NULL;
+
+ return disk_get_hashed_zone_wplug(disk, sector);
+}
+
static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
{
struct blk_zone_wplug *zwplug =
@@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk,
zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
hlist_del_init_rcu(&zwplug->node);
+ atomic_dec(&disk->nr_zone_wplugs);
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
disk_put_zone_wplug(zwplug);
}
@@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
{
struct bio *bio;
+ if (bio_list_empty(&zwplug->bio_list))
+ return;
+
+ pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n",
+ zwplug->disk->disk_name, zwplug->zone_no);
while ((bio = bio_list_pop(&zwplug->bio_list)))
blk_zone_wplug_bio_io_error(zwplug, bio);
}
@@ -1040,6 +1056,47 @@ plug:
return true;
}
+static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+ struct blk_zone_wplug *zwplug;
+ unsigned long flags;
+
+ /*
+ * We have native support for zone append operations, so we are not
+ * going to handle @bio through plugging. However, we may already have a
+ * zone write plug for the target zone if that zone was previously
+ * partially written using regular writes. In such case, we risk leaving
+ * the plug in the disk hash table if the zone is fully written using
+ * zone append operations. Avoid this by removing the zone write plug.
+ */
+ zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
+ if (likely(!zwplug))
+ return;
+
+ spin_lock_irqsave(&zwplug->lock, flags);
+
+ /*
+ * We are about to remove the zone write plug. But if the user
+ * (mistakenly) has issued regular writes together with native zone
+ * append, we must aborts the writes as otherwise the plugged BIOs would
+ * not be executed by the plug BIO work as disk_get_zone_wplug() will
+ * return NULL after the plug is removed. Aborting the plugged write
+ * BIOs is consistent with the fact that these writes will most likely
+ * fail anyway as there is no ordering guarantees between zone append
+ * operations and regular write operations.
+ */
+ if (!bio_list_empty(&zwplug->bio_list)) {
+ pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n",
+ disk->disk_name, zwplug->zone_no);
+ disk_zone_wplug_abort(zwplug);
+ }
+ disk_remove_zone_wplug(disk, zwplug);
+ spin_unlock_irqrestore(&zwplug->lock, flags);
+
+ disk_put_zone_wplug(zwplug);
+}
+
/**
* blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
* @bio: The BIO being submitted
@@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
*/
switch (bio_op(bio)) {
case REQ_OP_ZONE_APPEND:
- if (!bdev_emulates_zone_append(bdev))
+ if (!bdev_emulates_zone_append(bdev)) {
+ blk_zone_wplug_handle_native_zone_append(bio);
return false;
+ }
fallthrough;
case REQ_OP_WRITE:
case REQ_OP_WRITE_ZEROES:
@@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
{
unsigned int i;
+ atomic_set(&disk->nr_zone_wplugs, 0);
disk->zone_wplugs_hash_bits =
min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);
@@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
}
}
+ WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs));
kfree(disk->zone_wplugs_hash);
disk->zone_wplugs_hash = NULL;
disk->zone_wplugs_hash_bits = 0;
@@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
}
/*
- * We need to track the write pointer of all zones that are not
- * empty nor full. So make sure we have a zone write plug for
- * such zone if the device has a zone write plug hash table.
+ * If the device needs zone append emulation, we need to track the
+ * write pointer of all zones that are not empty nor full. So make sure
+ * we have a zone write plug for such zone if the device has a zone
+ * write plug hash table.
*/
- if (!disk->zone_wplugs_hash)
+ if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash)
return 0;
disk_zone_wplug_sync_wp_offset(disk, zone);
diff --git a/block/blk.h b/block/blk.h
index 90fa5f28ccab..9cf9a0099416 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -14,6 +14,7 @@
struct elevator_type;
#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9)
+#define BLK_MIN_SEGMENT_SIZE 4096
/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT (5 * HZ)
@@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio,
static inline bool bio_may_need_split(struct bio *bio,
const struct queue_limits *lim)
{
- return lim->chunk_sectors || bio->bi_vcnt != 1 ||
- bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
+ if (lim->chunk_sectors)
+ return true;
+ if (bio->bi_vcnt != 1)
+ return true;
+ return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset >
+ lim->min_segment_size;
}
/**
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index c80183156d68..b02530d98629 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -53,13 +53,25 @@ int mac_partition(struct parsed_partitions *state)
}
secsize = be16_to_cpu(md->block_size);
put_dev_sector(sect);
+
+ /*
+ * If the "block size" is not a power of 2, things get weird - we might
+ * end up with a partition straddling a sector boundary, so we wouldn't
+ * be able to read a partition entry with read_part_sector().
+ * Real block sizes are probably (?) powers of two, so just require
+ * that.
+ */
+ if (!is_power_of_2(secsize))
+ return -1;
datasize = round_down(secsize, 512);
data = read_part_sector(state, datasize / 512, &sect);
if (!data)
return -1;
partoffset = secsize % 512;
- if (partoffset + sizeof(*part) > datasize)
+ if (partoffset + sizeof(*part) > datasize) {
+ put_dev_sector(sect);
return -1;
+ }
part = (struct mac_partition *) (data + partoffset);
if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
put_dev_sector(sect);
@@ -112,8 +124,8 @@ int mac_partition(struct parsed_partitions *state)
int i, l;
goodness++;
- l = strlen(part->name);
- if (strcmp(part->name, "/") == 0)
+ l = strnlen(part->name, sizeof(part->name));
+ if (strncmp(part->name, "/", sizeof(part->name)) == 0)
goodness++;
for (i = 0; i <= l - 4; ++i) {
if (strncasecmp(part->name + i, "root",