From 052618c71c66d5de5e9b6cbcbad26932d951919c Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sun, 4 Feb 2024 12:31:42 -0300 Subject: block: rbd: make rbd_bus_type const Now that the driver core can properly handle constant struct bus_type, move the rbd_bus_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/20240204-bus_cleanup-block-v1-1-fc77afd8d7cc@marliere.net Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 12b5d53ec856..00ca8a1d8c46 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -575,7 +575,7 @@ static const struct attribute_group rbd_bus_group = { }; __ATTRIBUTE_GROUPS(rbd_bus); -static struct bus_type rbd_bus_type = { +static const struct bus_type rbd_bus_type = { .name = "rbd", .bus_groups = rbd_bus_groups, }; -- cgit From 14509b748ff58df3f0980b1cd70ade0e4a805e99 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 30 Jan 2024 13:21:34 +0900 Subject: null_blk: add configfs variable shared_tags Allow setting shared_tags through configfs, which could only be set as a module parameter. For that purpose, delay tag_set initialization from null_init() to null_add_dev(). Refer tag_set.ops as the flag to check if tag_set is initialized or not. The following parameters can not be set through configfs yet: timeout requeue init_hctx Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240130042134.2463659-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 38 ++++++++++++++++++++------------------ drivers/block/null_blk/null_blk.h | 1 + 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 36755f263e8e..4281371c81fe 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -165,8 +165,8 @@ static bool g_blocking; module_param_named(blocking, g_blocking, bool, 0444); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); -static bool shared_tags; -module_param(shared_tags, bool, 0444); +static bool g_shared_tags; +module_param_named(shared_tags, g_shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); static bool g_shared_tag_bitmap; @@ -426,6 +426,7 @@ NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); +NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) @@ -571,6 +572,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_offline, &nullb_device_attr_virt_boundary, &nullb_device_attr_no_sched, + &nullb_device_attr_shared_tags, &nullb_device_attr_shared_tag_bitmap, NULL, }; @@ -653,10 +655,11 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page) "badblocks,blocking,blocksize,cache_size," "completion_nsec,discard,home_node,hw_queue_depth," "irqmode,max_sectors,mbps,memory_backed,no_sched," - "poll_queues,power,queue_mode,shared_tag_bitmap,size," - "submit_queues,use_per_node_hctx,virt_boundary,zoned," - "zone_capacity,zone_max_active,zone_max_open," - "zone_nr_conv,zone_offline,zone_readonly,zone_size\n"); + "poll_queues,power,queue_mode,shared_tag_bitmap," + "shared_tags,size,submit_queues,use_per_node_hctx," + "virt_boundary,zoned,zone_capacity,zone_max_active," + "zone_max_open,zone_nr_conv,zone_offline,zone_readonly," + "zone_size\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -738,6 +741,7 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_max_active = g_zone_max_active; dev->virt_boundary = g_virt_boundary; dev->no_sched = g_no_sched; + dev->shared_tags = g_shared_tags; dev->shared_tag_bitmap = g_shared_tag_bitmap; return dev; } @@ -2124,7 +2128,14 @@ static int null_add_dev(struct nullb_device *dev) goto out_free_nullb; if (dev->queue_mode == NULL_Q_MQ) { - if (shared_tags) { + if (dev->shared_tags) { + if (!tag_set.ops) { + rv = null_init_tag_set(NULL, &tag_set); + if (rv) { + tag_set.ops = NULL; + goto out_cleanup_queues; + } + } nullb->tag_set = &tag_set; rv = 0; } else { @@ -2311,18 +2322,12 @@ static int __init null_init(void) g_submit_queues = 1; } - if (g_queue_mode == NULL_Q_MQ && shared_tags) { - ret = null_init_tag_set(NULL, &tag_set); - if (ret) - return ret; - } - config_group_init(&nullb_subsys.su_group); mutex_init(&nullb_subsys.su_mutex); ret = configfs_register_subsystem(&nullb_subsys); if (ret) - goto err_tagset; + return ret; mutex_init(&lock); @@ -2349,9 +2354,6 @@ err_dev: unregister_blkdev(null_major, "nullb"); err_conf: configfs_unregister_subsystem(&nullb_subsys); -err_tagset: - if (g_queue_mode == NULL_Q_MQ && shared_tags) - blk_mq_free_tag_set(&tag_set); return ret; } @@ -2370,7 +2372,7 @@ static void __exit null_exit(void) } mutex_unlock(&lock); - if (g_queue_mode == NULL_Q_MQ && shared_tags) + if (tag_set.ops) blk_mq_free_tag_set(&tag_set); } diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 929f659dd255..7bcfc0922ae8 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -119,6 +119,7 @@ struct nullb_device { bool zoned; /* if device is zoned */ bool virt_boundary; /* virtual boundary on/off for the device */ bool no_sched; /* no IO scheduler for the device */ + bool shared_tags; /* share tag set between devices for blk-mq */ bool shared_tag_bitmap; /* use hostwide shared tags */ }; -- cgit From 7789bf05529889a39bcf4cd17a68521de063b88b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:59:07 +0100 Subject: floppy: fix function pointer cast warnings clang-16 complains about a control flow integrity (kcfi) violation casting between incompatible pointers: drivers/block/floppy.c:2001:11: error: cast from 'void (*)(void)' to 'done_f' (aka 'void (*)(int)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 2001 | .done = (done_f)empty | ^~~~~~~~~~~~~ Just add another empty function with the correct prototype as a workaround. The warning is for code that was added before the start of the normal git history, but I tracked it done to an early change in the reconstructed linux-history.git. Fixes: 598a477afe06 ("Import 1.1.41") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213095918.455478-1-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d0e41d52d6a9..2ba0ba135951 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -530,14 +530,13 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); /* this is called after the interrupt of the * main command */ void (*redo)(void); /* this is called to retry the operation */ void (*error)(void); /* this is called to tally an error */ - done_f done; /* this is called to say if the operation has + void (*done)(int); /* this is called to say if the operation has * succeeded/failed */ } *cont; @@ -985,6 +984,10 @@ static void empty(void) { } +static void empty_done(int result) +{ +} + static void (*floppy_work_fn)(void); static void floppy_work_workfn(struct work_struct *work) @@ -1998,14 +2001,14 @@ static const struct cont_t wakeup_cont = { .interrupt = empty, .redo = do_wakeup, .error = empty, - .done = (done_f)empty + .done = empty_done, }; static const struct cont_t intr_cont = { .interrupt = empty, .redo = process_fd_request, .error = empty, - .done = (done_f)empty + .done = empty_done, }; /* schedules handler, waiting for completion. May be interrupted, will then -- cgit From fe0b1e9a73d60f01fdc391925be74e823af7c91d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:03:01 +0100 Subject: drbd: fix function cast warnings in state machine There are four state machines in drbd that use a common infrastructure, with a cast to an incompatible function type in REMEMBER_STATE_CHANGE that clang-16 now warns about: drivers/block/drbd/drbd_state.c:1632:3: error: cast from 'int (*)(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type)' to 'typeof (last_func)' (aka 'int (*)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1632 | REMEMBER_STATE_CHANGE(notify_resource_state_change, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1633 | resource_state_change, NOTIFY_CHANGE); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/block/drbd/drbd_state.c:1619:17: note: expanded from macro 'REMEMBER_STATE_CHANGE' 1619 | last_func = (typeof(last_func))func; \ | ^~~~~~~~~~~~~~~~~~~~~~~ drivers/block/drbd/drbd_state.c:1641:4: error: cast from 'int (*)(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type)' to 'typeof (last_func)' (aka 'int (*)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1641 | REMEMBER_STATE_CHANGE(notify_connection_state_change, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1642 | connection_state_change, NOTIFY_CHANGE); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change these all to actually expect a void pointer to be passed, which matches the caller. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100354.457128-1-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_state.c | 24 ++++++++++++++---------- drivers/block/drbd/drbd_state_change.h | 8 ++++---- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 287a8d1d3f70..e858e7e0383f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_resource_state_change *resource_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_resource_state_change *resource_state_change = state_change; struct drbd_resource *resource = resource_state_change->resource; struct resource_info resource_info = { .res_role = resource_state_change->role[NEW], @@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb, int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_connection_state_change *connection_state_change, + void *state_change, enum drbd_notification_type type) { - struct drbd_connection *connection = connection_state_change->connection; + struct drbd_connection_state_change *p = state_change; + struct drbd_connection *connection = p->connection; struct connection_info connection_info = { - .conn_connection_state = connection_state_change->cstate[NEW], - .conn_role = connection_state_change->peer_role[NEW], + .conn_connection_state = p->cstate[NEW], + .conn_role = p->peer_role[NEW], }; return notify_connection_state(skb, seq, connection, &connection_info, type); @@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb, int notify_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_device_state_change *device_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_device_state_change *device_state_change = state_change; struct drbd_device *device = device_state_change->device; struct device_info device_info = { .dev_disk_state = device_state_change->disk_state[NEW], @@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb, int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_peer_device_state_change *p, + void *state_change, enum drbd_notification_type type) { + struct drbd_peer_device_state_change *p = state_change; struct drbd_peer_device *peer_device = p->peer_device; struct peer_device_info peer_device_info = { .peer_repl_state = p->repl_state[NEW], @@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - int (*last_func)(struct sk_buff *, unsigned int, void *, - enum drbd_notification_type) = NULL; + int (*last_func)(struct sk_buff *, unsigned int, + void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; #define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW]) @@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) }) #define REMEMBER_STATE_CHANGE(func, arg, type) \ ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \ - last_func = (typeof(last_func))func; \ + last_func = func; \ last_arg = arg; \ }) diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index 9d78d8e3912e..a56a57d67686 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *); extern int notify_resource_state_change(struct sk_buff *, unsigned int, - struct drbd_resource_state_change *, + void *, enum drbd_notification_type type); extern int notify_connection_state_change(struct sk_buff *, unsigned int, - struct drbd_connection_state_change *, + void *, enum drbd_notification_type type); extern int notify_device_state_change(struct sk_buff *, unsigned int, - struct drbd_device_state_change *, + void *, enum drbd_notification_type type); extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, - struct drbd_peer_device_state_change *, + void *, enum drbd_notification_type type); #endif /* DRBD_STATE_CHANGE_H */ -- cgit From 27e32cd23fed1ab88098897897dcb9ec2bdba4de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:20 +0100 Subject: block: pass a queue_limits argument to blk_mq_alloc_disk Pass a queue_limits to blk_mq_alloc_disk and apply it if non-NULL. This will allow allocating queues with valid queue limits instead of setting the values one at a time later. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: John Garry Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-11-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 2 +- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/ataflop.c | 2 +- drivers/block/floppy.c | 2 +- drivers/block/loop.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/nbd.c | 2 +- drivers/block/null_blk/main.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/rbd.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 2 +- drivers/block/sunvdc.c | 2 +- drivers/block/swim.c | 2 +- drivers/block/swim3.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/block/z2ram.c | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 2b98114a9fe0..a25414228e47 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1779,7 +1779,7 @@ static int fd_alloc_disk(int drive, int system) struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index b1b47d88f5db..2ff6e2da8cc4 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -371,7 +371,7 @@ aoeblk_gdalloc(void *vp) goto err_mempool; } - gd = blk_mq_alloc_disk(set, d); + gd = blk_mq_alloc_disk(set, NULL, d); if (IS_ERR(gd)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 50949207798d..cacc4ba942a8 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1994,7 +1994,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2ba0ba135951..582cf50c6bf6 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4518,7 +4518,7 @@ static int floppy_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - disk = blk_mq_alloc_disk(&tag_sets[drive], NULL); + disk = blk_mq_alloc_disk(&tag_sets[drive], NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f8145499da38..3f855cc79c29 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2025,7 +2025,7 @@ static int loop_add(int i) if (err) goto out_free_idr; - disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, NULL, lo); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index b200950e8fb5..ac08dea73552 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3431,7 +3431,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto block_queue_alloc_tag_error; } - dd->disk = blk_mq_alloc_disk(&dd->tags, dd); + dd->disk = blk_mq_alloc_disk(&dd->tags, NULL, dd); if (IS_ERR(dd->disk)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 33a8f37bb6a1..30ae3cc12e77 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1823,7 +1823,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) if (err < 0) goto out_free_tags; - disk = blk_mq_alloc_disk(&nbd->tag_set, NULL); + disk = blk_mq_alloc_disk(&nbd->tag_set, NULL, NULL); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_idr; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 4281371c81fe..eeb895ec6f34 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2147,7 +2147,7 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_queues; nullb->tag_set->timeout = 5 * HZ; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); goto out_cleanup_tags; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 36d7b36c60c7..dfd3860df4f8 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -431,7 +431,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) if (error) goto fail_teardown; - gendisk = blk_mq_alloc_disk(&priv->tag_set, dev); + gendisk = blk_mq_alloc_disk(&priv->tag_set, NULL, dev); if (IS_ERR(gendisk)) { dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n", __func__, __LINE__); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 00ca8a1d8c46..6b4f1898a722 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4966,7 +4966,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (err) return err; - disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev); + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, NULL, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 4044c369d22a..d51be4f2df61 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1408,7 +1408,7 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, dev->size = le64_to_cpu(rsp->nsectors) * le16_to_cpu(rsp->logical_block_size); - dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev); + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, NULL, dev); if (IS_ERR(dev->gd)) return PTR_ERR(dev->gd); dev->queue = dev->gd->queue; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 7bf4b48e2282..a1f74dd1eae5 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -824,7 +824,7 @@ static int probe_disk(struct vdc_port *port) if (err) return err; - g = blk_mq_alloc_disk(&port->tag_set, port); + g = blk_mq_alloc_disk(&port->tag_set, NULL, port); if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); diff --git a/drivers/block/swim.c b/drivers/block/swim.c index f85b6af414b4..16bdf62067d8 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -820,7 +820,7 @@ static int swim_floppy_init(struct swim_priv *swd) goto exit_put_disks; swd->unit[drive].disk = - blk_mq_alloc_disk(&swd->unit[drive].tag_set, + blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL, &swd->unit[drive]); if (IS_ERR(swd->unit[drive].disk)) { blk_mq_free_tag_set(&swd->unit[drive].tag_set); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c2bc85826358..a04756ac778e 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1210,7 +1210,7 @@ static int swim3_attach(struct macio_dev *mdev, if (rc) goto out_unregister; - disk = blk_mq_alloc_disk(&fs->tag_set, fs); + disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs); if (IS_ERR(disk)) { rc = PTR_ERR(disk); goto out_free_tag_set; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1dfb2e77898b..c5b655270798 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2222,7 +2222,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, NULL); + disk = blk_mq_alloc_disk(&ub->tag_set, NULL, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5bf98fd6a651..a23fce4eca44 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1330,7 +1330,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_vq; - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); if (IS_ERR(vblk->disk)) { err = PTR_ERR(vblk->disk); goto out_free_tags; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 434fab306777..4cc2884e7484 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1136,7 +1136,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (err) goto out_release_minors; - gd = blk_mq_alloc_disk(&info->tag_set, info); + gd = blk_mq_alloc_disk(&info->tag_set, NULL, info); if (IS_ERR(gd)) { err = PTR_ERR(gd); goto out_free_tag_set; diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 11493167b0a8..7c5f4e4d9b50 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -318,7 +318,7 @@ static int z2ram_register_disk(int minor) struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&tag_set, NULL); + disk = blk_mq_alloc_disk(&tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); -- cgit From 718628adfcfdc80466eb42cd9c615d1d5514f74c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:21 +0100 Subject: virtio_blk: split virtblk_probe Split out a virtblk_read_limits helper that just reads the various queue limits to separate it from the higher level probing logic. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Stefan Hajnoczi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-12-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 193 ++++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 92 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a23fce4eca44..dd46ccd9f84c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1248,31 +1248,17 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); -static int virtblk_probe(struct virtio_device *vdev) +static int virtblk_read_limits(struct virtio_blk *vblk) { - struct virtio_blk *vblk; - struct request_queue *q; - int err, index; - + struct request_queue *q = vblk->disk->queue; + struct virtio_device *vdev = vblk->vdev; u32 v, blk_size, max_size, sg_elems, opt_io_size; u32 max_discard_segs = 0; u32 discard_granularity = 0; u16 min_io_size; u8 physical_block_exp, alignment_offset; - unsigned int queue_depth; size_t max_dma_size; - - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - - err = ida_alloc_range(&vd_index_ida, 0, - minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); - if (err < 0) - goto out; - index = err; + int err; /* We need to know how many segments before we allocate. */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, @@ -1286,73 +1272,6 @@ static int virtblk_probe(struct virtio_device *vdev) /* Prevent integer overflows and honor max vq size */ sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); - vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); - if (!vblk) { - err = -ENOMEM; - goto out_free_index; - } - - mutex_init(&vblk->vdev_mutex); - - vblk->vdev = vdev; - - INIT_WORK(&vblk->config_work, virtblk_config_changed_work); - - err = init_vq(vblk); - if (err) - goto out_free_vblk; - - /* Default queue sizing is to fill the ring. */ - if (!virtblk_queue_depth) { - queue_depth = vblk->vqs[0].vq->num_free; - /* ... but without indirect descs, we use 2 descs per req */ - if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) - queue_depth /= 2; - } else { - queue_depth = virtblk_queue_depth; - } - - memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); - vblk->tag_set.ops = &virtio_mq_ops; - vblk->tag_set.queue_depth = queue_depth; - vblk->tag_set.numa_node = NUMA_NO_NODE; - vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - vblk->tag_set.cmd_size = - sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; - vblk->tag_set.driver_data = vblk; - vblk->tag_set.nr_hw_queues = vblk->num_vqs; - vblk->tag_set.nr_maps = 1; - if (vblk->io_queues[HCTX_TYPE_POLL]) - vblk->tag_set.nr_maps = 3; - - err = blk_mq_alloc_tag_set(&vblk->tag_set); - if (err) - goto out_free_vq; - - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); - if (IS_ERR(vblk->disk)) { - err = PTR_ERR(vblk->disk); - goto out_free_tags; - } - q = vblk->disk->queue; - - virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); - - vblk->disk->major = major; - vblk->disk->first_minor = index_to_minor(index); - vblk->disk->minors = 1 << PART_BITS; - vblk->disk->private_data = vblk; - vblk->disk->fops = &virtblk_fops; - vblk->index = index; - - /* configure queue flush support */ - virtblk_update_cache_mode(vdev); - - /* If disk is read-only in the host, the guest should obey */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) - set_disk_ro(vblk->disk, 1); - /* We can handle whatever the host told us to handle. */ blk_queue_max_segments(q, sg_elems); @@ -1381,7 +1300,7 @@ static int virtblk_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "virtio_blk: invalid block size: 0x%x\n", blk_size); - goto out_cleanup_disk; + return err; } blk_queue_logical_block_size(q, blk_size); @@ -1455,8 +1374,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: secure_erase_sector_alignment can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } discard_granularity = min_not_zero(discard_granularity, v); @@ -1470,8 +1388,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: max_secure_erase_sectors can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } blk_queue_max_secure_erase_sectors(q, v); @@ -1485,8 +1402,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: max_secure_erase_seg can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } max_discard_segs = min_not_zero(max_discard_segs, v); @@ -1511,6 +1427,99 @@ static int virtblk_probe(struct virtio_device *vdev) q->limits.discard_granularity = blk_size; } + return 0; +} + +static int virtblk_probe(struct virtio_device *vdev) +{ + struct virtio_blk *vblk; + struct request_queue *q; + int err, index; + unsigned int queue_depth; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + err = ida_alloc_range(&vd_index_ida, 0, + minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); + if (err < 0) + goto out; + index = err; + + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); + if (!vblk) { + err = -ENOMEM; + goto out_free_index; + } + + mutex_init(&vblk->vdev_mutex); + + vblk->vdev = vdev; + + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); + + err = init_vq(vblk); + if (err) + goto out_free_vblk; + + /* Default queue sizing is to fill the ring. */ + if (!virtblk_queue_depth) { + queue_depth = vblk->vqs[0].vq->num_free; + /* ... but without indirect descs, we use 2 descs per req */ + if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) + queue_depth /= 2; + } else { + queue_depth = virtblk_queue_depth; + } + + memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); + vblk->tag_set.ops = &virtio_mq_ops; + vblk->tag_set.queue_depth = queue_depth; + vblk->tag_set.numa_node = NUMA_NO_NODE; + vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + vblk->tag_set.cmd_size = + sizeof(struct virtblk_req) + + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; + vblk->tag_set.driver_data = vblk; + vblk->tag_set.nr_hw_queues = vblk->num_vqs; + vblk->tag_set.nr_maps = 1; + if (vblk->io_queues[HCTX_TYPE_POLL]) + vblk->tag_set.nr_maps = 3; + + err = blk_mq_alloc_tag_set(&vblk->tag_set); + if (err) + goto out_free_vq; + + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); + if (IS_ERR(vblk->disk)) { + err = PTR_ERR(vblk->disk); + goto out_free_tags; + } + q = vblk->disk->queue; + + virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); + + vblk->disk->major = major; + vblk->disk->first_minor = index_to_minor(index); + vblk->disk->minors = 1 << PART_BITS; + vblk->disk->private_data = vblk; + vblk->disk->fops = &virtblk_fops; + vblk->index = index; + + /* configure queue flush support */ + virtblk_update_cache_mode(vdev); + + /* If disk is read-only in the host, the guest should obey */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) + set_disk_ro(vblk->disk, 1); + + err = virtblk_read_limits(vblk); + if (err) + goto out_cleanup_disk; + virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); -- cgit From 8b837256560c783415b42748959900befcde2d00 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:22 +0100 Subject: virtio_blk: pass queue_limits to blk_mq_alloc_disk Call virtblk_read_limits and most of virtblk_probe_zoned_device before allocating the gendisk and thus request_queue and make them read into a queue_limits structure instead. Pass this initialized queue_limits to blk_mq_alloc_disk to set the queue up with the right parameters from the start and only leave a few final touches for zoned devices to be done just before adding the disk. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Stefan Hajnoczi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-13-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 130 ++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 66 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd46ccd9f84c..d8b55874cd59 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -720,16 +720,15 @@ fail_report: return ret; } -static int virtblk_probe_zoned_device(struct virtio_device *vdev, - struct virtio_blk *vblk, - struct request_queue *q) +static int virtblk_read_zoned_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { + struct virtio_device *vdev = vblk->vdev; u32 v, wg; dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - disk_set_zoned(vblk->disk); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + lim->zoned = true; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); @@ -747,8 +746,8 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_warn(&vdev->dev, "zero write granularity reported\n"); return -ENODEV; } - blk_queue_physical_block_size(q, wg); - blk_queue_io_min(q, wg); + lim->physical_block_size = wg; + lim->io_min = wg; dev_dbg(&vdev->dev, "write granularity = %u\n", wg); @@ -764,13 +763,13 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, vblk->zone_sectors); return -ENODEV; } - blk_queue_chunk_sectors(q, vblk->zone_sectors); + lim->chunk_sectors = vblk->zone_sectors; dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { dev_warn(&vblk->vdev->dev, "ignoring negotiated F_DISCARD for zoned device\n"); - blk_queue_max_discard_sectors(q, 0); + lim->max_hw_discard_sectors = 0; } virtio_cread(vdev, struct virtio_blk_config, @@ -785,25 +784,21 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, wg, v); return -ENODEV; } - blk_queue_max_zone_append_sectors(q, v); + lim->max_zone_append_sectors = v; dev_dbg(&vdev->dev, "max append sectors = %u\n", v); - return blk_revalidate_disk_zones(vblk->disk, NULL); + return 0; } - #else - /* - * Zoned block device support is not configured in this kernel. - * Host-managed zoned devices can't be supported, but others are - * good to go as regular block devices. + * Zoned block device support is not configured in this kernel, host-managed + * zoned devices can't be supported. */ #define virtblk_report_zones NULL - -static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, - struct virtio_blk *vblk, struct request_queue *q) +static inline int virtblk_read_zoned_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { - dev_err(&vdev->dev, + dev_err(&vblk->vdev->dev, "virtio_blk: zoned devices are not supported"); return -EOPNOTSUPP; } @@ -1248,9 +1243,9 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); -static int virtblk_read_limits(struct virtio_blk *vblk) +static int virtblk_read_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { - struct request_queue *q = vblk->disk->queue; struct virtio_device *vdev = vblk->vdev; u32 v, blk_size, max_size, sg_elems, opt_io_size; u32 max_discard_segs = 0; @@ -1273,10 +1268,10 @@ static int virtblk_read_limits(struct virtio_blk *vblk) sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); /* We can handle whatever the host told us to handle. */ - blk_queue_max_segments(q, sg_elems); + lim->max_segments = sg_elems; /* No real sector limit. */ - blk_queue_max_hw_sectors(q, UINT_MAX); + lim->max_hw_sectors = UINT_MAX; max_dma_size = virtio_max_dma_size(vdev); max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size; @@ -1288,7 +1283,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) if (!err) max_size = min(max_size, v); - blk_queue_max_segment_size(q, max_size); + lim->max_segment_size = max_size; /* Host can optionally specify the block size of the device */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, @@ -1303,35 +1298,34 @@ static int virtblk_read_limits(struct virtio_blk *vblk) return err; } - blk_queue_logical_block_size(q, blk_size); + lim->logical_block_size = blk_size; } else - blk_size = queue_logical_block_size(q); + blk_size = lim->logical_block_size; /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, &physical_block_exp); if (!err && physical_block_exp) - blk_queue_physical_block_size(q, - blk_size * (1 << physical_block_exp)); + lim->physical_block_size = blk_size * (1 << physical_block_exp); err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, alignment_offset, &alignment_offset); if (!err && alignment_offset) - blk_queue_alignment_offset(q, blk_size * alignment_offset); + lim->alignment_offset = blk_size * alignment_offset; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, min_io_size, &min_io_size); if (!err && min_io_size) - blk_queue_io_min(q, blk_size * min_io_size); + lim->io_min = blk_size * min_io_size; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, opt_io_size, &opt_io_size); if (!err && opt_io_size) - blk_queue_io_opt(q, blk_size * opt_io_size); + lim->io_opt = blk_size * opt_io_size; if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { virtio_cread(vdev, struct virtio_blk_config, @@ -1339,7 +1333,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); - blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); + lim->max_hw_discard_sectors = v ? v : UINT_MAX; virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &max_discard_segs); @@ -1348,7 +1342,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { virtio_cread(vdev, struct virtio_blk_config, max_write_zeroes_sectors, &v); - blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); + lim->max_write_zeroes_sectors = v ? v : UINT_MAX; } /* The discard and secure erase limits are combined since the Linux @@ -1391,7 +1385,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) return -EINVAL; } - blk_queue_max_secure_erase_sectors(q, v); + lim->max_secure_erase_sectors = v; virtio_cread(vdev, struct virtio_blk_config, max_secure_erase_seg, &v); @@ -1418,13 +1412,34 @@ static int virtblk_read_limits(struct virtio_blk *vblk) if (!max_discard_segs) max_discard_segs = sg_elems; - blk_queue_max_discard_segments(q, - min(max_discard_segs, MAX_DISCARD_SEGMENTS)); + lim->max_discard_segments = + min(max_discard_segs, MAX_DISCARD_SEGMENTS); if (discard_granularity) - q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT; + lim->discard_granularity = + discard_granularity << SECTOR_SHIFT; else - q->limits.discard_granularity = blk_size; + lim->discard_granularity = blk_size; + } + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { + u8 model; + + virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); + switch (model) { + case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + /* treat host-aware devices as non-zoned */ + return 0; + case VIRTIO_BLK_Z_HM: + err = virtblk_read_zoned_limits(vblk, lim); + if (err) + return err; + break; + default: + dev_err(&vdev->dev, "unsupported zone model %d\n", model); + return -EINVAL; + } } return 0; @@ -1433,7 +1448,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk) static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; - struct request_queue *q; + struct queue_limits lim = { }; int err, index; unsigned int queue_depth; @@ -1493,12 +1508,15 @@ static int virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_vq; - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, NULL, vblk); + err = virtblk_read_limits(vblk, &lim); + if (err) + goto out_free_tags; + + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk); if (IS_ERR(vblk->disk)) { err = PTR_ERR(vblk->disk); goto out_free_tags; } - q = vblk->disk->queue; virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); @@ -1516,10 +1534,6 @@ static int virtblk_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) set_disk_ro(vblk->disk, 1); - err = virtblk_read_limits(vblk); - if (err) - goto out_cleanup_disk; - virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); @@ -1527,27 +1541,11 @@ static int virtblk_probe(struct virtio_device *vdev) * All steps that follow use the VQs therefore they need to be * placed after the virtio_device_ready() call above. */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { - u8 model; - - virtio_cread(vdev, struct virtio_blk_config, zoned.model, - &model); - switch (model) { - case VIRTIO_BLK_Z_NONE: - case VIRTIO_BLK_Z_HA: - /* Present the host-aware device as non-zoned */ - break; - case VIRTIO_BLK_Z_HM: - err = virtblk_probe_zoned_device(vdev, vblk, q); - if (err) - goto out_cleanup_disk; - break; - default: - dev_err(&vdev->dev, "unsupported zone model %d\n", - model); - err = -EINVAL; + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); + err = blk_revalidate_disk_zones(vblk->disk, NULL); + if (err) goto out_cleanup_disk; - } } err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); -- cgit From 65bdd16f8c72bb2178f5e4db40305bef6c96b309 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:23 +0100 Subject: loop: cleanup loop_config_discard Initialize the local variables for the discard max sectors and granularity to zero as a sensible default, and then merge the calls assigning them to the queue limits. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-14-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3f855cc79c29..7abeb5869426 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -755,7 +755,8 @@ static void loop_config_discard(struct loop_device *lo) struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; - u32 granularity, max_discard_sectors; + u32 granularity = 0, max_discard_sectors = 0; + struct kstatfs sbuf; /* * If the backing device is a block device, mirror its zeroing @@ -775,29 +776,17 @@ static void loop_config_discard(struct loop_device *lo) * We use punch hole to reclaim the free space used by the * image a.k.a. discard. */ - } else if (!file->f_op->fallocate) { - max_discard_sectors = 0; - granularity = 0; - - } else { - struct kstatfs sbuf; - + } else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) { max_discard_sectors = UINT_MAX >> 9; - if (!vfs_statfs(&file->f_path, &sbuf)) - granularity = sbuf.f_bsize; - else - max_discard_sectors = 0; + granularity = sbuf.f_bsize; } - if (max_discard_sectors) { + blk_queue_max_discard_sectors(q, max_discard_sectors); + blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + if (max_discard_sectors) q->limits.discard_granularity = granularity; - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - } else { + else q->limits.discard_granularity = 0; - blk_queue_max_discard_sectors(q, 0); - blk_queue_max_write_zeroes_sectors(q, 0); - } } struct loop_worker { -- cgit From 02aed4a1f2c355e41d82a8e9831031ca9e0eb45d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:24 +0100 Subject: loop: pass queue_limits to blk_mq_alloc_disk Pass the max_hw_sector limit loop sets at initialization time directly to blk_mq_alloc_disk instead of updating it right after the allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-15-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7abeb5869426..26c8ea790867 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1971,6 +1971,12 @@ static const struct blk_mq_ops loop_mq_ops = { static int loop_add(int i) { + struct queue_limits lim = { + /* + * Random number picked from the historic block max_sectors cap. + */ + .max_hw_sectors = 2560u, + }; struct loop_device *lo; struct gendisk *disk; int err; @@ -2014,16 +2020,13 @@ static int loop_add(int i) if (err) goto out_free_idr; - disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, NULL, lo); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, &lim, lo); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; } lo->lo_queue = lo->lo_disk->queue; - /* random number picked from the history block max_sectors cap */ - blk_queue_max_hw_sectors(lo->lo_queue, 2560u); - /* * By default, we do buffer IO, so it doesn't make sense to enable * merge because the I/O submitted to backing file is handled page by -- cgit From 473516b361936cbc27d7728df649a5b3094b6170 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Feb 2024 08:34:25 +0100 Subject: loop: use the atomic queue limits update API Pass the default limits to blk_mq_alloc_disk and then use the queue_limits_{start,commit}_update API to change the limits in an atomic way on existing loop gendisks. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240213073425.1621680-16-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 26c8ea790867..28a95fd366fe 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -750,11 +750,11 @@ static void loop_sysfs_exit(struct loop_device *lo) &loop_attribute_group); } -static void loop_config_discard(struct loop_device *lo) +static void loop_config_discard(struct loop_device *lo, + struct queue_limits *lim) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; - struct request_queue *q = lo->lo_queue; u32 granularity = 0, max_discard_sectors = 0; struct kstatfs sbuf; @@ -781,12 +781,12 @@ static void loop_config_discard(struct loop_device *lo) granularity = sbuf.f_bsize; } - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); + lim->max_hw_discard_sectors = max_discard_sectors; + lim->max_write_zeroes_sectors = max_discard_sectors; if (max_discard_sectors) - q->limits.discard_granularity = granularity; + lim->discard_granularity = granularity; else - q->limits.discard_granularity = 0; + lim->discard_granularity = 0; } struct loop_worker { @@ -975,6 +975,20 @@ loop_set_status_from_info(struct loop_device *lo, return 0; } +static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize, + bool update_discard_settings) +{ + struct queue_limits lim; + + lim = queue_limits_start_update(lo->lo_queue); + lim.logical_block_size = bsize; + lim.physical_block_size = bsize; + lim.io_min = bsize; + if (update_discard_settings) + loop_config_discard(lo, &lim); + return queue_limits_commit_update(lo->lo_queue, &lim); +} + static int loop_configure(struct loop_device *lo, blk_mode_t mode, struct block_device *bdev, const struct loop_config *config) @@ -1072,11 +1086,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, else bsize = 512; - blk_queue_logical_block_size(lo->lo_queue, bsize); - blk_queue_physical_block_size(lo->lo_queue, bsize); - blk_queue_io_min(lo->lo_queue, bsize); + error = loop_reconfigure_limits(lo, bsize, true); + if (WARN_ON_ONCE(error)) + goto out_unlock; - loop_config_discard(lo); loop_update_rotational(lo); loop_update_dio(lo); loop_sysfs_init(lo); @@ -1143,9 +1156,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) lo->lo_offset = 0; lo->lo_sizelimit = 0; memset(lo->lo_file_name, 0, LO_NAME_SIZE); - blk_queue_logical_block_size(lo->lo_queue, 512); - blk_queue_physical_block_size(lo->lo_queue, 512); - blk_queue_io_min(lo->lo_queue, 512); + loop_reconfigure_limits(lo, 512, false); invalidate_disk(lo->lo_disk); loop_sysfs_exit(lo); /* let user-space know about this change */ @@ -1477,9 +1488,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) invalidate_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); - blk_queue_logical_block_size(lo->lo_queue, arg); - blk_queue_physical_block_size(lo->lo_queue, arg); - blk_queue_io_min(lo->lo_queue, arg); + err = loop_reconfigure_limits(lo, arg, false); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); -- cgit From 31edf4bbe0ba27fd03ac7d87eb2ee3d2a231af6d Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 17 Feb 2024 20:25:38 -0800 Subject: nbd: null check for nla_nest_start nla_nest_start() may fail and return NULL. Insert a check and set errno based on other call sites within the same source code. Signed-off-by: Navid Emamdoost Reviewed-by: Michal Kubecek Fixes: 47d902b90a32 ("nbd: add a status netlink command") Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240218042534.it.206-kees@kernel.org Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 30ae3cc12e77..d2b422d842b7 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2433,6 +2433,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) } dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { -- cgit From 74fa8f9c553f7b5ccab7d103acae63cc2e080465 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:47 +0100 Subject: block: pass a queue_limits argument to blk_alloc_disk Pass a queue_limits to blk_alloc_disk and apply it if non-NULL. This will allow allocating queues with valid queue limits instead of setting the values one at a time later. Also change blk_alloc_disk to return an ERR_PTR instead of just NULL which can't distinguish errors. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/brd.c | 7 ++++--- drivers/block/drbd/drbd_main.c | 6 ++++-- drivers/block/n64cart.c | 6 ++++-- drivers/block/null_blk/main.c | 7 ++++--- drivers/block/pktcdvd.c | 7 ++++--- drivers/block/ps3vram.c | 6 +++--- drivers/block/zram/zram_drv.c | 6 +++--- 7 files changed, 26 insertions(+), 19 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 970bd6ff38c4..689a3c0c31f8 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -335,10 +335,11 @@ static int brd_alloc(int i) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = brd->brd_disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_free_dev; - + } disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; disk->minors = max_part; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6bc86106c7b2..cea1e537fd56 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2708,9 +2708,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_no_disk; + } device->vdisk = disk; device->rq_queue = disk->queue; diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index d914156db2d8..c64d7ee7a44d 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -131,9 +131,11 @@ static int __init n64cart_probe(struct platform_device *pdev) if (IS_ERR(reg_base)) return PTR_ERR(reg_base); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out; + } disk->first_minor = 0; disk->flags = GENHD_FL_NO_PART; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index eeb895ec6f34..baf2b228d008 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2154,10 +2154,11 @@ static int null_add_dev(struct nullb_device *dev) } nullb->q = nullb->disk->queue; } else if (dev->queue_mode == NULL_Q_BIO) { - rv = -ENOMEM; - nullb->disk = blk_alloc_disk(nullb->dev->home_node); - if (!nullb->disk) + nullb->disk = blk_alloc_disk(NULL, nullb->dev->home_node); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); goto out_cleanup_queues; + } nullb->q = nullb->disk->queue; rv = init_driver_queues(nullb); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d56d972aadb3..abb82926b1c9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2673,10 +2673,11 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->write_congestion_on = write_congestion_on; pd->write_congestion_off = write_congestion_off; - ret = -ENOMEM; - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); goto out_mem; + } pd->disk = disk; disk->major = pktdev_major; disk->first_minor = idx; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 38d42af01b25..bdcf083b45e2 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -730,10 +730,10 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) ps3vram_proc_init(dev); - gendisk = blk_alloc_disk(NUMA_NO_NODE); - if (!gendisk) { + gendisk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(gendisk)) { dev_err(&dev->core, "blk_alloc_disk failed\n"); - error = -ENOMEM; + error = PTR_ERR(gendisk); goto out_cache_cleanup; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6772e0c654fa..84982221fc66 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2195,11 +2195,11 @@ static int zram_add(void) #endif /* gendisk structure */ - zram->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!zram->disk) { + zram->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(zram->disk)) { pr_err("Error allocating disk structure for device %d\n", device_id); - ret = -ENOMEM; + ret = PTR_ERR(zram->disk); goto out_free_idr; } -- cgit From b5baaba4ce5c8a0e36b5232b16c0731e3eb0d939 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:49 +0100 Subject: brd: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/brd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 689a3c0c31f8..e322cef6596b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -318,6 +318,16 @@ static int brd_alloc(int i) struct gendisk *disk; char buf[DISK_NAME_LEN]; int err = -ENOMEM; + struct queue_limits lim = { + /* + * This is so fdisk will align partitions on 4k, because of + * direct_access API needing 4k alignment, returning a PFN + * (This is only a problem on very small devices <= 4M, + * otherwise fdisk will align on 1M. Regardless this call + * is harmless) + */ + .physical_block_size = PAGE_SIZE, + }; list_for_each_entry(brd, &brd_devices, brd_list) if (brd->brd_number == i) @@ -335,7 +345,7 @@ static int brd_alloc(int i) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - disk = brd->brd_disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_dev; @@ -348,15 +358,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - /* - * This is so fdisk will align partitions on 4k, because of - * direct_access API needing 4k alignment, returning a PFN - * (This is only a problem on very small devices <= 4M, - * otherwise fdisk will align on 1M. Regardless this call - * is harmless) - */ - blk_queue_physical_block_size(disk->queue, PAGE_SIZE); - /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); -- cgit From cc7f05c7ec0b26e1eda8ec7a99452032d08d305e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:50 +0100 Subject: n64cart: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/n64cart.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index c64d7ee7a44d..27b2187e7a6d 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -114,6 +114,10 @@ static const struct block_device_operations n64cart_fops = { */ static int __init n64cart_probe(struct platform_device *pdev) { + struct queue_limits lim = { + .physical_block_size = 4096, + .logical_block_size = 4096, + }; struct gendisk *disk; int err = -ENOMEM; @@ -131,7 +135,7 @@ static int __init n64cart_probe(struct platform_device *pdev) if (IS_ERR(reg_base)) return PTR_ERR(reg_base); - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out; @@ -147,8 +151,6 @@ static int __init n64cart_probe(struct platform_device *pdev) set_disk_ro(disk, 1); blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_physical_block_size(disk->queue, 4096); - blk_queue_logical_block_size(disk->queue, 4096); err = add_disk(disk); if (err) -- cgit From 4190b3f291d9563a438bf32424a3f049442fc3a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:10:51 +0100 Subject: zram: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Reviewed-by: Dan Williams Reviewed-by: Himanshu Madhani Link: https://lore.kernel.org/r/20240215071055.2201424-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 47 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 84982221fc66..8ee0f7bef190 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2177,6 +2177,28 @@ ATTRIBUTE_GROUPS(zram_disk); */ static int zram_add(void) { + struct queue_limits lim = { + .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, + /* + * To ensure that we always get PAGE_SIZE aligned and + * n*PAGE_SIZED sized I/O requests. + */ + .physical_block_size = PAGE_SIZE, + .io_min = PAGE_SIZE, + .io_opt = PAGE_SIZE, + .max_hw_discard_sectors = UINT_MAX, + /* + * zram_bio_discard() will clear all logical blocks if logical + * block size is identical with physical block size(PAGE_SIZE). + * But if it is different, we will skip discarding some parts of + * logical blocks in the part of the request range which isn't + * aligned to physical block size. So we can't ensure that all + * discarded logical blocks are zeroed. + */ +#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE + .max_write_zeroes_sectors = UINT_MAX, +#endif + }; struct zram *zram; int ret, device_id; @@ -2195,7 +2217,7 @@ static int zram_add(void) #endif /* gendisk structure */ - zram->disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(zram->disk)) { pr_err("Error allocating disk structure for device %d\n", device_id); @@ -2216,29 +2238,6 @@ static int zram_add(void) /* zram devices sort of resembles non-rotational disks */ blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); - - /* - * To ensure that we always get PAGE_SIZE aligned - * and n*PAGE_SIZED sized I/O requests. - */ - blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); - blk_queue_logical_block_size(zram->disk->queue, - ZRAM_LOGICAL_BLOCK_SIZE); - blk_queue_io_min(zram->disk->queue, PAGE_SIZE); - blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); - blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - - /* - * zram_bio_discard() will clear all logical blocks if logical block - * size is identical with physical block size(PAGE_SIZE). But if it is - * different, we will skip discarding some parts of logical blocks in - * the part of the request range which isn't aligned to physical block - * size. So we can't ensure that all discarded logical blocks are - * zeroed. - */ - if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) - blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) -- cgit From 9999200f583107f7e244e50935d480433b7d8a3b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:45 +0100 Subject: aoe: pass queue_limits to blk_mq_alloc_disk Pass the few limits aoe imposes directly to blk_mq_alloc_disk instead of setting them one at a time and improve the way the default max_hw_sectors is initialized while we're at it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 2ff6e2da8cc4..b6dac8cee70f 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -24,8 +24,8 @@ static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; static struct dentry *aoe_debugfs_dir; -/* GPFS needs a larger value than the default. */ -static int aoe_maxsectors; +/* random default picked from the historic block max_sectors cap */ +static int aoe_maxsectors = 2560; module_param(aoe_maxsectors, int, 0644); MODULE_PARM_DESC(aoe_maxsectors, "When nonzero, set the maximum number of sectors per I/O request"); @@ -334,6 +334,10 @@ aoeblk_gdalloc(void *vp) mempool_t *mp; struct blk_mq_tag_set *set; sector_t ssize; + struct queue_limits lim = { + .max_hw_sectors = aoe_maxsectors, + .io_opt = SZ_2M, + }; ulong flags; int late = 0; int err; @@ -371,7 +375,7 @@ aoeblk_gdalloc(void *vp) goto err_mempool; } - gd = blk_mq_alloc_disk(set, NULL, d); + gd = blk_mq_alloc_disk(set, &lim, d); if (IS_ERR(gd)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); @@ -384,14 +388,9 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->flags & DEVFL_TKILL); WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); - /* random number picked from the history block max_sectors cap */ - blk_queue_max_hw_sectors(gd->queue, 2560u); - blk_queue_io_opt(gd->queue, SZ_2M); d->bufpool = mp; d->blkq = gd->queue; d->gd = gd; - if (aoe_maxsectors) - blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; gd->minors = AOE_PARTITIONS; -- cgit From 48bc8c7ba6fb39a4325b07f3abe8fe5a77361c7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:46 +0100 Subject: floppy: pass queue_limits to blk_mq_alloc_disk Pass the few limits floppy imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Denis Efremov Link: https://lore.kernel.org/r/20240215070300.2200308-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 582cf50c6bf6..1b399ec8c07d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4516,13 +4516,15 @@ static bool floppy_available(int drive) static int floppy_alloc_disk(unsigned int drive, unsigned int type) { + struct queue_limits lim = { + .max_hw_sectors = 64, + }; struct gendisk *disk; - disk = blk_mq_alloc_disk(&tag_sets[drive], NULL, NULL); + disk = blk_mq_alloc_disk(&tag_sets[drive], &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); - blk_queue_max_hw_sectors(disk->queue, 64); disk->major = FLOPPY_MAJOR; disk->first_minor = TOMINOR(drive) | (type << 2); disk->minors = 1; -- cgit From 68c3135fb5fbd85c7b2ca851184f30f54433a9d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:47 +0100 Subject: mtip: pass queue_limits to blk_mq_alloc_disk Pass the few limits mtip imposes directly to blk_mq_alloc_disk instead of setting them one at a time and drop the pointless setting of a io_min that is equal to the physical block size. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index ac08dea73552..43a187609ef7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3401,6 +3401,12 @@ static const struct blk_mq_ops mtip_mq_ops = { */ static int mtip_block_initialize(struct driver_data *dd) { + struct queue_limits lim = { + .physical_block_size = 4096, + .max_hw_sectors = 0xffff, + .max_segments = MTIP_MAX_SG, + .max_segment_size = 0x400000, + }; int rv = 0, wait_for_rebuild = 0; sector_t capacity; unsigned int index = 0; @@ -3431,7 +3437,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto block_queue_alloc_tag_error; } - dd->disk = blk_mq_alloc_disk(&dd->tags, NULL, dd); + dd->disk = blk_mq_alloc_disk(&dd->tags, &lim, dd); if (IS_ERR(dd->disk)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3481,12 +3487,7 @@ skip_create_disk: /* Set device limits. */ blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); - blk_queue_max_segments(dd->queue, MTIP_MAX_SG); - blk_queue_physical_block_size(dd->queue, 4096); - blk_queue_max_hw_sectors(dd->queue, 0xffff); - blk_queue_max_segment_size(dd->queue, 0x400000); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); - blk_queue_io_min(dd->queue, 4096); /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { -- cgit From 9a0d4970288de29191fa45bf0ab4d8398bfa3a01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:48 +0100 Subject: nbd: pass queue_limits to blk_mq_alloc_disk Pass the few limits nbd imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d2b422d842b7..9ee9587375fa 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1783,6 +1783,12 @@ static const struct blk_mq_ops nbd_mq_ops = { static struct nbd_device *nbd_dev_add(int index, unsigned int refs) { + struct queue_limits lim = { + .max_hw_sectors = 65536, + .max_user_sectors = 256, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; struct nbd_device *nbd; struct gendisk *disk; int err = -ENOMEM; @@ -1823,7 +1829,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) if (err < 0) goto out_free_tags; - disk = blk_mq_alloc_disk(&nbd->tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&nbd->tag_set, &lim, NULL); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_idr; @@ -1843,11 +1849,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) * Tell the block layer that we are not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_max_discard_sectors(disk->queue, 0); - blk_queue_max_segment_size(disk->queue, UINT_MAX); - blk_queue_max_segments(disk->queue, USHRT_MAX); - blk_queue_max_hw_sectors(disk->queue, 65536); - disk->queue->limits.max_sectors = 256; mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); -- cgit From a7f18b74dbe171625afc2751942a92f71a4dd4ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:49 +0100 Subject: ps3disk: pass queue_limits to blk_mq_alloc_disk Pass the few limits ps3disk imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index dfd3860df4f8..b810ac0a5c4b 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -382,6 +382,14 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv; int error; unsigned int devidx; + struct queue_limits lim = { + .logical_block_size = dev->blk_size, + .max_hw_sectors = dev->bounce_size >> 9, + .max_segments = -1, + .max_segment_size = dev->bounce_size, + .dma_alignment = dev->blk_size - 1, + }; + struct request_queue *queue; struct gendisk *gendisk; @@ -431,7 +439,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) if (error) goto fail_teardown; - gendisk = blk_mq_alloc_disk(&priv->tag_set, NULL, dev); + gendisk = blk_mq_alloc_disk(&priv->tag_set, &lim, dev); if (IS_ERR(gendisk)) { dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n", __func__, __LINE__); @@ -441,15 +449,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) queue = gendisk->queue; - blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); - blk_queue_dma_alignment(queue, dev->blk_size-1); - blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_write_cache(queue, true, false); - blk_queue_max_segments(queue, -1); - blk_queue_max_segment_size(queue, dev->bounce_size); - priv->gendisk = gendisk; gendisk->major = ps3disk_major; gendisk->first_minor = devidx * PS3DISK_MINORS; -- cgit From 24f30b770c0f450346f1c99120427b2e938cdfd0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:50 +0100 Subject: rbd: pass queue_limits to blk_mq_alloc_disk Pass the limits rbd imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6b4f1898a722..26ff5cd2bf0a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4952,6 +4952,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) struct request_queue *q; unsigned int objset_bytes = rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; + struct queue_limits lim = { + .max_hw_sectors = objset_bytes >> SECTOR_SHIFT, + .max_user_sectors = objset_bytes >> SECTOR_SHIFT, + .io_min = rbd_dev->opts->alloc_size, + .io_opt = rbd_dev->opts->alloc_size, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; int err; memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); @@ -4966,7 +4974,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (err) return err; - disk = blk_mq_alloc_disk(&rbd_dev->tag_set, NULL, rbd_dev); + if (rbd_dev->opts->trim) { + lim.discard_granularity = rbd_dev->opts->alloc_size; + lim.max_hw_discard_sectors = objset_bytes >> SECTOR_SHIFT; + lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT; + } + + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; @@ -4987,19 +5001,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); - q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, USHRT_MAX); - blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, rbd_dev->opts->alloc_size); - blk_queue_io_opt(q, rbd_dev->opts->alloc_size); - - if (rbd_dev->opts->trim) { - q->limits.discard_granularity = rbd_dev->opts->alloc_size; - blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); - blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); - } - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); -- cgit From e6ed9892f10d7195d621ede1cedc41421f1ca607 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:51 +0100 Subject: rnbd-clt: pass queue_limits to blk_mq_alloc_disk Pass the limits rnbd-clt imposes directly to blk_mq_alloc_disk instead of setting them one at a time. While at it don't set an explicit number of discard segments, as 1 is the default (which most drivers rely on). Signed-off-by: Christoph Hellwig Acked-by: Jack Wang Link: https://lore.kernel.org/r/20240215070300.2200308-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 64 +++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 39 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index d51be4f2df61..b7ffe03c6160 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1329,43 +1329,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev) } } -static void setup_request_queue(struct rnbd_clt_dev *dev, - struct rnbd_msg_open_rsp *rsp) -{ - blk_queue_logical_block_size(dev->queue, - le16_to_cpu(rsp->logical_block_size)); - blk_queue_physical_block_size(dev->queue, - le16_to_cpu(rsp->physical_block_size)); - blk_queue_max_hw_sectors(dev->queue, - dev->sess->max_io_size / SECTOR_SIZE); - - /* - * we don't support discards to "discontiguous" segments - * in on request - */ - blk_queue_max_discard_segments(dev->queue, 1); - - blk_queue_max_discard_sectors(dev->queue, - le32_to_cpu(rsp->max_discard_sectors)); - dev->queue->limits.discard_granularity = - le32_to_cpu(rsp->discard_granularity); - dev->queue->limits.discard_alignment = - le32_to_cpu(rsp->discard_alignment); - if (le16_to_cpu(rsp->secure_discard)) - blk_queue_max_secure_erase_sectors(dev->queue, - le32_to_cpu(rsp->max_discard_sectors)); - blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); - blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); - blk_queue_max_segments(dev->queue, dev->sess->max_segments); - blk_queue_io_opt(dev->queue, dev->sess->max_io_size); - blk_queue_virt_boundary(dev->queue, SZ_4K - 1); - blk_queue_write_cache(dev->queue, - !!(rsp->cache_policy & RNBD_WRITEBACK), - !!(rsp->cache_policy & RNBD_FUA)); - blk_queue_max_write_zeroes_sectors(dev->queue, - le32_to_cpu(rsp->max_write_zeroes_sectors)); -} - static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, struct rnbd_msg_open_rsp *rsp, int idx) { @@ -1403,18 +1366,41 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, struct rnbd_msg_open_rsp *rsp) { + struct queue_limits lim = { + .logical_block_size = le16_to_cpu(rsp->logical_block_size), + .physical_block_size = le16_to_cpu(rsp->physical_block_size), + .io_opt = dev->sess->max_io_size, + .max_hw_sectors = dev->sess->max_io_size / SECTOR_SIZE, + .max_hw_discard_sectors = le32_to_cpu(rsp->max_discard_sectors), + .discard_granularity = le32_to_cpu(rsp->discard_granularity), + .discard_alignment = le32_to_cpu(rsp->discard_alignment), + .max_segments = dev->sess->max_segments, + .virt_boundary_mask = SZ_4K - 1, + .max_write_zeroes_sectors = + le32_to_cpu(rsp->max_write_zeroes_sectors), + }; int idx = dev->clt_device_id; dev->size = le64_to_cpu(rsp->nsectors) * le16_to_cpu(rsp->logical_block_size); - dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, NULL, dev); + if (rsp->secure_discard) { + lim.max_secure_erase_sectors = + le32_to_cpu(rsp->max_discard_sectors); + } + + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev); if (IS_ERR(dev->gd)) return PTR_ERR(dev->gd); dev->queue = dev->gd->queue; rnbd_init_mq_hw_queues(dev); - setup_request_queue(dev, rsp); + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); + blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); + blk_queue_write_cache(dev->queue, + !!(rsp->cache_policy & RNBD_WRITEBACK), + !!(rsp->cache_policy & RNBD_FUA)); + return rnbd_clt_setup_gen_disk(dev, rsp, idx); } -- cgit From d0fa9a8b0af71b69cf3dec10feaebe19d55a72cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:52 +0100 Subject: sunvdc: pass queue_limits to blk_mq_alloc_disk Pass the few limits sunvdc imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/sunvdc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index a1f74dd1eae5..c99dd6698977 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -784,6 +784,14 @@ static const struct blk_mq_ops vdc_mq_ops = { static int probe_disk(struct vdc_port *port) { + struct queue_limits lim = { + .physical_block_size = port->vdisk_phys_blksz, + .max_hw_sectors = port->max_xfer_size, + /* Each segment in a request is up to an aligned page in size. */ + .seg_boundary_mask = PAGE_SIZE - 1, + .max_segment_size = PAGE_SIZE, + .max_segments = port->ring_cookies, + }; struct request_queue *q; struct gendisk *g; int err; @@ -824,7 +832,7 @@ static int probe_disk(struct vdc_port *port) if (err) return err; - g = blk_mq_alloc_disk(&port->tag_set, NULL, port); + g = blk_mq_alloc_disk(&port->tag_set, &lim, port); if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); @@ -835,12 +843,6 @@ static int probe_disk(struct vdc_port *port) port->disk = g; q = g->queue; - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - blk_queue_max_segment_size(q, PAGE_SIZE); - - blk_queue_max_segments(q, port->ring_cookies); - blk_queue_max_hw_sectors(q, port->max_xfer_size); g->major = vdc_major; g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT; g->minors = 1 << PARTITION_SHIFT; @@ -872,8 +874,6 @@ static int probe_disk(struct vdc_port *port) } } - blk_queue_physical_block_size(q, port->vdisk_phys_blksz); - pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", g->disk_name, port->vdisk_size, (port->vdisk_size >> (20 - 9)), -- cgit From 494ea040bcb5f4cc78c37dc53c7915752c24f739 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Feb 2024 08:02:59 +0100 Subject: ublk: pass queue_limits to blk_mq_alloc_disk Pass the limits ublk imposes directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240215070300.2200308-17-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 90 ++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 49 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c5b655270798..01afe90a47ac 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -246,21 +246,12 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) return 0; } -static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - const struct ublk_param_zoned *p = &ub->params.zoned; - - disk_set_zoned(ub->ub_disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); blk_queue_required_elevator_features(ub->ub_disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE); - disk_set_max_active_zones(ub->ub_disk, p->max_active_zones); - disk_set_max_open_zones(ub->ub_disk, p->max_open_zones); - blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors); - ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); - - return 0; } /* Based on virtblk_alloc_report_buffer */ @@ -432,9 +423,8 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) return -EOPNOTSUPP; } -static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - return -EOPNOTSUPP; } static int ublk_revalidate_disk_zones(struct ublk_device *ub) @@ -498,11 +488,6 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) struct request_queue *q = ub->ub_disk->queue; const struct ublk_param_basic *p = &ub->params.basic; - blk_queue_logical_block_size(q, 1 << p->logical_bs_shift); - blk_queue_physical_block_size(q, 1 << p->physical_bs_shift); - blk_queue_io_min(q, 1 << p->io_min_shift); - blk_queue_io_opt(q, 1 << p->io_opt_shift); - blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE, p->attrs & UBLK_ATTR_FUA); if (p->attrs & UBLK_ATTR_ROTATIONAL) @@ -510,29 +495,12 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - blk_queue_max_hw_sectors(q, p->max_sectors); - blk_queue_chunk_sectors(q, p->chunk_sectors); - blk_queue_virt_boundary(q, p->virt_boundary_mask); - if (p->attrs & UBLK_ATTR_READ_ONLY) set_disk_ro(ub->ub_disk, true); set_capacity(ub->ub_disk, p->dev_sectors); } -static void ublk_dev_param_discard_apply(struct ublk_device *ub) -{ - struct request_queue *q = ub->ub_disk->queue; - const struct ublk_param_discard *p = &ub->params.discard; - - q->limits.discard_alignment = p->discard_alignment; - q->limits.discard_granularity = p->discard_granularity; - blk_queue_max_discard_sectors(q, p->max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, - p->max_write_zeroes_sectors); - blk_queue_max_discard_segments(q, p->max_discard_segments); -} - static int ublk_validate_params(const struct ublk_device *ub) { /* basic param is the only one which must be set */ @@ -576,20 +544,12 @@ static int ublk_validate_params(const struct ublk_device *ub) return 0; } -static int ublk_apply_params(struct ublk_device *ub) +static void ublk_apply_params(struct ublk_device *ub) { - if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC)) - return -EINVAL; - ublk_dev_param_basic_apply(ub); - if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) - ublk_dev_param_discard_apply(ub); - if (ub->params.types & UBLK_PARAM_TYPE_ZONED) - return ublk_dev_param_zoned_apply(ub); - - return 0; + ublk_dev_param_zoned_apply(ub); } static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) @@ -2205,12 +2165,47 @@ static struct ublk_device *ublk_get_device_from_id(int idx) static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) { const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); + const struct ublk_param_basic *p = &ub->params.basic; int ublksrv_pid = (int)header->data[0]; + struct queue_limits lim = { + .logical_block_size = 1 << p->logical_bs_shift, + .physical_block_size = 1 << p->physical_bs_shift, + .io_min = 1 << p->io_min_shift, + .io_opt = 1 << p->io_opt_shift, + .max_hw_sectors = p->max_sectors, + .chunk_sectors = p->chunk_sectors, + .virt_boundary_mask = p->virt_boundary_mask, + + }; struct gendisk *disk; int ret = -EINVAL; if (ublksrv_pid <= 0) return -EINVAL; + if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC)) + return -EINVAL; + + if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) { + const struct ublk_param_discard *pd = &ub->params.discard; + + lim.discard_alignment = pd->discard_alignment; + lim.discard_granularity = pd->discard_granularity; + lim.max_hw_discard_sectors = pd->max_discard_sectors; + lim.max_write_zeroes_sectors = pd->max_write_zeroes_sectors; + lim.max_discard_segments = pd->max_discard_segments; + } + + if (ub->params.types & UBLK_PARAM_TYPE_ZONED) { + const struct ublk_param_zoned *p = &ub->params.zoned; + + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return -EOPNOTSUPP; + + lim.zoned = true; + lim.max_active_zones = p->max_active_zones; + lim.max_open_zones = p->max_open_zones; + lim.max_zone_append_sectors = p->max_zone_append_sectors; + } if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; @@ -2222,7 +2217,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&ub->tag_set, &lim, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; @@ -2234,9 +2229,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) ub->dev_info.ublksrv_pid = ublksrv_pid; ub->ub_disk = disk; - ret = ublk_apply_params(ub); - if (ret) - goto out_put_disk; + ublk_apply_params(ub); /* don't probe partitions if any one ubq daemon is un-trusted */ if (ub->nr_privileged_daemon != ub->nr_queues_ready) @@ -2262,7 +2255,6 @@ out_put_cdev: ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); } -out_put_disk: if (ret) put_disk(disk); out_unlock: -- cgit From 8b631f9cf0b84ac59cd4f0c6dcd2d0cb80dd8a49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:44 +0100 Subject: null_blk: remove the bio based I/O path The bio based I/O path complicates null_blk and also make various data structures, including the per-command one way bigger than required for the main request based interface. As the bio-based path is mostly used by stacking drivers and simple memory based drivers, and brd is a good example driver for the latter there is no need to have a bio based path in null_blk. Remove the path to simplify the driver and make future block layer API changes simpler by not having to deal with the complex two API setup in null_blk. Note that the queue_mode field in struct nullb_device is kept as that is simpler than having two different places to check the value and fully open coding the debugfs helpers as the existing ones won't work without a named struct member. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 365 +++++++------------------------------- drivers/block/null_blk/null_blk.h | 17 -- drivers/block/null_blk/trace.h | 5 +- drivers/block/null_blk/zoned.c | 10 +- 4 files changed, 69 insertions(+), 328 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index baf2b228d008..d6836327eefb 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=,,,"); #endif +/* + * Historic queue modes. + * + * These days nothing but NULL_Q_MQ is actually supported, but we keep it the + * enum for error reporting. + */ +enum { + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + static int g_queue_mode = NULL_Q_MQ; static int null_param_store_val(const char *str, int *val, int min, int max) @@ -756,98 +768,11 @@ static void null_free_dev(struct nullb_device *dev) kfree(dev); } -static void put_tag(struct nullb_queue *nq, unsigned int tag) -{ - clear_bit_unlock(tag, nq->tag_map); - - if (waitqueue_active(&nq->wait)) - wake_up(&nq->wait); -} - -static unsigned int get_tag(struct nullb_queue *nq) -{ - unsigned int tag; - - do { - tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); - if (tag >= nq->queue_depth) - return -1U; - } while (test_and_set_bit_lock(tag, nq->tag_map)); - - return tag; -} - -static void free_cmd(struct nullb_cmd *cmd) -{ - put_tag(cmd->nq, cmd->tag); -} - -static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer); - -static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - unsigned int tag; - - tag = get_tag(nq); - if (tag != -1U) { - cmd = &nq->cmds[tag]; - cmd->tag = tag; - cmd->error = BLK_STS_OK; - cmd->nq = nq; - if (nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; - } - return cmd; - } - - return NULL; -} - -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio) -{ - struct nullb_cmd *cmd; - DEFINE_WAIT(wait); - - do { - /* - * This avoids multiple return statements, multiple calls to - * __alloc_cmd() and a fast path call to prepare_to_wait(). - */ - cmd = __alloc_cmd(nq); - if (cmd) { - cmd->bio = bio; - return cmd; - } - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); - io_schedule(); - finish_wait(&nq->wait, &wait); - } while (1); -} - -static void end_cmd(struct nullb_cmd *cmd) -{ - int queue_mode = cmd->nq->dev->queue_mode; - - switch (queue_mode) { - case NULL_Q_MQ: - blk_mq_end_request(cmd->rq, cmd->error); - return; - case NULL_Q_BIO: - cmd->bio->bi_status = cmd->error; - bio_endio(cmd->bio); - break; - } - - free_cmd(cmd); -} - static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) { - end_cmd(container_of(timer, struct nullb_cmd, timer)); + struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer); + blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error); return HRTIMER_NORESTART; } @@ -860,7 +785,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_complete_rq(struct request *rq) { - end_cmd(blk_mq_rq_to_pdu(rq)); + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + + blk_mq_end_request(rq, cmd->error); } static struct nullb_page *null_alloc_page(void) @@ -1277,7 +1204,7 @@ static int null_transfer(struct nullb *nullb, struct page *page, static int null_handle_rq(struct nullb_cmd *cmd) { - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; @@ -1302,63 +1229,21 @@ static int null_handle_rq(struct nullb_cmd *cmd) return 0; } -static int null_handle_bio(struct nullb_cmd *cmd) -{ - struct bio *bio = cmd->bio; - struct nullb *nullb = cmd->nq->dev->nullb; - int err; - unsigned int len; - sector_t sector = bio->bi_iter.bi_sector; - struct bio_vec bvec; - struct bvec_iter iter; - - spin_lock_irq(&nullb->lock); - bio_for_each_segment(bvec, bio, iter) { - len = bvec.bv_len; - err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector, - bio->bi_opf & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; - } - spin_unlock_irq(&nullb->lock); - return 0; -} - -static void null_stop_queue(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_stop_hw_queues(q); -} - -static void null_restart_queue_async(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_start_stopped_hw_queues(q, true); -} - static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts = BLK_STS_OK; - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); if (!hrtimer_active(&nullb->bw_timer)) hrtimer_restart(&nullb->bw_timer); if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) { - null_stop_queue(nullb); + blk_mq_stop_hw_queues(nullb->q); /* race with timer */ if (atomic_long_read(&nullb->cur_bytes) > 0) - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); /* requeue request */ sts = BLK_STS_DEV_RESOURCE; } @@ -1385,37 +1270,29 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; - int err; if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); + return errno_to_blk_status(null_handle_rq(cmd)); - if (dev->queue_mode == NULL_Q_BIO) - err = null_handle_bio(cmd); - else - err = null_handle_rq(cmd); - - return errno_to_blk_status(err); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb_device *dev = cmd->nq->dev; struct bio *bio; - if (dev->memory_backed) - return; - - if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) { - zero_fill_bio(cmd->bio); - } else if (req_op(cmd->rq) == REQ_OP_READ) { - __rq_for_each_bio(bio, cmd->rq) + if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) { + __rq_for_each_bio(bio, rq) zero_fill_bio(bio); } } static inline void nullb_complete_cmd(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); + /* * Since root privileges are required to configure the null_blk * driver, it is fine that this driver does not initialize the @@ -1429,20 +1306,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) /* Complete IO by inline, softirq or timer */ switch (cmd->nq->dev->irqmode) { case NULL_IRQ_SOFTIRQ: - switch (cmd->nq->dev->queue_mode) { - case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq); - break; - case NULL_Q_BIO: - /* - * XXX: no proper submitting cpu information available. - */ - end_cmd(cmd); - break; - } + blk_mq_complete_request(rq); break; case NULL_IRQ_NONE: - end_cmd(cmd); + blk_mq_end_request(rq, cmd->error); break; case NULL_IRQ_TIMER: null_cmd_end_timer(cmd); @@ -1503,7 +1370,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); hrtimer_forward_now(&nullb->bw_timer, timer_interval); @@ -1520,26 +1387,6 @@ static void nullb_setup_bwtimer(struct nullb *nullb) hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); } -static struct nullb_queue *nullb_to_queue(struct nullb *nullb) -{ - int index = 0; - - if (nullb->nr_queues != 1) - index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); - - return &nullb->queues[index]; -} - -static void null_submit_bio(struct bio *bio) -{ - sector_t sector = bio->bi_iter.bi_sector; - sector_t nr_sectors = bio_sectors(bio); - struct nullb *nullb = bio->bi_bdev->bd_disk->private_data; - struct nullb_queue *nq = nullb_to_queue(nullb); - - null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); -} - #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static bool should_timeout_request(struct request *rq) @@ -1659,7 +1506,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) blk_rq_sectors(req)); if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, blk_mq_end_request_batch)) - end_cmd(cmd); + blk_mq_end_request(req, cmd->error); nr++; } @@ -1715,7 +1562,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } - cmd->rq = rq; cmd->error = BLK_STS_OK; cmd->nq = nq; cmd->fake_timeout = should_timeout_request(rq) || @@ -1774,22 +1620,6 @@ static void null_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } -static void cleanup_queue(struct nullb_queue *nq) -{ - bitmap_free(nq->tag_map); - kfree(nq->cmds); -} - -static void cleanup_queues(struct nullb *nullb) -{ - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); - - kfree(nullb->queues); -} - static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { struct nullb_queue *nq = hctx->driver_data; @@ -1800,8 +1630,6 @@ static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; nq->dev = nullb->dev; INIT_LIST_HEAD(&nq->poll_list); spin_lock_init(&nq->poll_lock); @@ -1853,14 +1681,13 @@ static void null_del_dev(struct nullb *nullb) if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { hrtimer_cancel(&nullb->bw_timer); atomic_long_set(&nullb->cur_bytes, LONG_MAX); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); } put_disk(nullb->disk); - if (dev->queue_mode == NULL_Q_MQ && - nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - cleanup_queues(nullb); + kfree(nullb->queues); if (null_cache_active(nullb)) null_free_device_storage(nullb->dev, true); kfree(nullb); @@ -1887,40 +1714,11 @@ static void null_config_discard(struct nullb *nullb) blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); } -static const struct block_device_operations null_bio_ops = { +static const struct block_device_operations null_ops = { .owner = THIS_MODULE, - .submit_bio = null_submit_bio, .report_zones = null_report_zones, }; -static const struct block_device_operations null_rq_ops = { - .owner = THIS_MODULE, - .report_zones = null_report_zones, -}; - -static int setup_commands(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - int i; - - nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL); - if (!nq->cmds) - return -ENOMEM; - - nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL); - if (!nq->tag_map) { - kfree(nq->cmds); - return -ENOMEM; - } - - for (i = 0; i < nq->queue_depth; i++) { - cmd = &nq->cmds[i]; - cmd->tag = -1U; - } - - return 0; -} - static int setup_queues(struct nullb *nullb) { int nqueues = nr_cpu_ids; @@ -1937,24 +1735,6 @@ static int setup_queues(struct nullb *nullb) return 0; } -static int init_driver_queues(struct nullb *nullb) -{ - struct nullb_queue *nq; - int i, ret = 0; - - for (i = 0; i < nullb->dev->submit_queues; i++) { - nq = &nullb->queues[i]; - - null_init_queue(nullb, nq); - - ret = setup_commands(nq); - if (ret) - return ret; - nullb->nr_queues++; - } - return 0; -} - static int null_gendisk_register(struct nullb *nullb) { sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; @@ -1965,10 +1745,7 @@ static int null_gendisk_register(struct nullb *nullb) disk->major = null_major; disk->first_minor = nullb->index; disk->minors = 1; - if (queue_is_mq(nullb->q)) - disk->fops = &null_rq_ops; - else - disk->fops = &null_bio_ops; + disk->fops = &null_ops; disk->private_data = nullb; strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); @@ -2036,11 +1813,15 @@ static int null_validate_conf(struct nullb_device *dev) pr_err("legacy IO path is no longer available\n"); return -EINVAL; } + if (dev->queue_mode == NULL_Q_BIO) { + pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n"); + dev->queue_mode = NULL_Q_MQ; + } dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); - if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { + if (dev->use_per_node_hctx) { if (dev->submit_queues != nr_online_nodes) dev->submit_queues = nr_online_nodes; } else if (dev->submit_queues > nr_cpu_ids) @@ -2052,8 +1833,6 @@ static int null_validate_conf(struct nullb_device *dev) if (dev->poll_queues > g_poll_queues) dev->poll_queues = g_poll_queues; dev->prev_poll_queues = dev->poll_queues; - - dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); /* Do memory allocation, so set blocking */ @@ -2064,9 +1843,6 @@ static int null_validate_conf(struct nullb_device *dev) dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, dev->cache_size); dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); - /* can not stop a queue */ - if (dev->queue_mode == NULL_Q_BIO) - dev->mbps = 0; if (dev->zoned && (!dev->zone_size || !is_power_of_2(dev->zone_size))) { @@ -2127,44 +1903,31 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_free_nullb; - if (dev->queue_mode == NULL_Q_MQ) { - if (dev->shared_tags) { - if (!tag_set.ops) { - rv = null_init_tag_set(NULL, &tag_set); - if (rv) { - tag_set.ops = NULL; - goto out_cleanup_queues; - } + if (dev->shared_tags) { + if (!tag_set.ops) { + rv = null_init_tag_set(NULL, &tag_set); + if (rv) { + tag_set.ops = NULL; + goto out_cleanup_queues; } - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); } + nullb->tag_set = &tag_set; + rv = 0; + } else { + nullb->tag_set = &nullb->__tag_set; + rv = null_init_tag_set(nullb, nullb->tag_set); + } - if (rv) - goto out_cleanup_queues; - - nullb->tag_set->timeout = 5 * HZ; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); - if (IS_ERR(nullb->disk)) { - rv = PTR_ERR(nullb->disk); - goto out_cleanup_tags; - } - nullb->q = nullb->disk->queue; - } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->disk = blk_alloc_disk(NULL, nullb->dev->home_node); - if (IS_ERR(nullb->disk)) { - rv = PTR_ERR(nullb->disk); - goto out_cleanup_queues; - } + if (rv) + goto out_cleanup_queues; - nullb->q = nullb->disk->queue; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_disk; + nullb->tag_set->timeout = 5 * HZ; + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); + goto out_cleanup_tags; } + nullb->q = nullb->disk->queue; if (dev->mbps) { set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); @@ -2232,10 +1995,10 @@ out_cleanup_zone: out_cleanup_disk: put_disk(nullb->disk); out_cleanup_tags: - if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); out_cleanup_queues: - cleanup_queues(nullb); + kfree(nullb->queues); out_free_nullb: kfree(nullb); dev->nullb = NULL; @@ -2311,7 +2074,7 @@ static int __init null_init(void) return -EINVAL; } - if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { + if (g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("submit_queues param is set to %u.\n", nr_online_nodes); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 7bcfc0922ae8..7c618d53d8fd 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -16,11 +16,6 @@ #include struct nullb_cmd { - union { - struct request *rq; - struct bio *bio; - }; - unsigned int tag; blk_status_t error; bool fake_timeout; struct nullb_queue *nq; @@ -28,16 +23,11 @@ struct nullb_cmd { }; struct nullb_queue { - unsigned long *tag_map; - wait_queue_head_t wait; - unsigned int queue_depth; struct nullb_device *dev; unsigned int requeue_selection; struct list_head poll_list; spinlock_t poll_lock; - - struct nullb_cmd *cmds; }; struct nullb_zone { @@ -60,13 +50,6 @@ struct nullb_zone { unsigned int capacity; }; -/* Queue modes */ -enum { - NULL_Q_BIO = 0, - NULL_Q_RQ = 1, - NULL_Q_MQ = 2, -}; - struct nullb_device { struct nullb *nullb; struct config_group group; diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h index 6b2b370e786f..ef2d05d5f0df 100644 --- a/drivers/block/null_blk/trace.h +++ b/drivers/block/null_blk/trace.h @@ -41,10 +41,11 @@ TRACE_EVENT(nullb_zone_op, __field(unsigned int, zone_cond) ), TP_fast_assign( - __entry->op = req_op(cmd->rq); + __entry->op = req_op(blk_mq_rq_from_pdu(cmd)); __entry->zone_no = zone_no; __entry->zone_cond = zone_cond; - __assign_disk_name(__entry->disk, cmd->rq->q->disk); + __assign_disk_name(__entry->disk, + blk_mq_rq_from_pdu(cmd)->q->disk); ), TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s", __print_disk_name(__entry->disk), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 6f5e0994862e..3605afe105da 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -168,10 +168,7 @@ int null_register_zoned_dev(struct nullb *nullb) disk_set_max_open_zones(nullb->disk, dev->zone_max_open); disk_set_max_active_zones(nullb->disk, dev->zone_max_active); - if (queue_is_mq(q)) - return blk_revalidate_disk_zones(nullb->disk, NULL); - - return 0; + return blk_revalidate_disk_zones(nullb->disk, NULL); } void null_free_zoned_dev(struct nullb_device *dev) @@ -394,10 +391,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, */ if (append) { sector = zone->wp; - if (dev->queue_mode == NULL_Q_MQ) - cmd->rq->__sector = sector; - else - cmd->bio->bi_iter.bi_sector = sector; + blk_mq_rq_from_pdu(cmd)->__sector = sector; } else if (sector != zone->wp) { ret = BLK_STS_IOERR; goto unlock; -- cgit From e32b0855367b65095823b4427aad3da7c6a771a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:45 +0100 Subject: null_blk: initialize the tag_set timeout in null_init_tag_set Otherwise it will be reset to the always same value when initializing a device using the shared tag_set. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index d6836327eefb..89e63d1c6103 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1797,6 +1797,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) set->nr_hw_queues = hw_queues; set->queue_depth = queue_depth; set->numa_node = numa_node; + set->timeout = 5 * HZ; if (poll_queues) { set->nr_hw_queues += poll_queues; set->nr_maps = 3; @@ -1921,7 +1922,6 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - nullb->tag_set->timeout = 5 * HZ; nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); -- cgit From 72ca28765fc461c1aeb87372359ec0cfd609448b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:46 +0100 Subject: null_blk: refactor tag_set setup Move the tagset initialization out of null_add_dev into a new null_setup_tagset helper, and move the shared vs local differences out of null_init_tag_set into the callers. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 106 ++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 55 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 89e63d1c6103..03c3917a56fa 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1759,55 +1759,65 @@ static int null_gendisk_register(struct nullb *nullb) return add_disk(disk); } -static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) +static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues) { - unsigned int flags = BLK_MQ_F_SHOULD_MERGE; - int hw_queues, numa_node; - unsigned int queue_depth; - int poll_queues; - - if (nullb) { - hw_queues = nullb->dev->submit_queues; - poll_queues = nullb->dev->poll_queues; - queue_depth = nullb->dev->hw_queue_depth; - numa_node = nullb->dev->home_node; - if (nullb->dev->no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (nullb->dev->shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (nullb->dev->blocking) - flags |= BLK_MQ_F_BLOCKING; - } else { - hw_queues = g_submit_queues; - poll_queues = g_poll_queues; - queue_depth = g_hw_queue_depth; - numa_node = g_home_node; - if (g_no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (g_shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (g_blocking) - flags |= BLK_MQ_F_BLOCKING; - } - set->ops = &null_mq_ops; - set->cmd_size = sizeof(struct nullb_cmd); - set->flags = flags; - set->driver_data = nullb; - set->nr_hw_queues = hw_queues; - set->queue_depth = queue_depth; - set->numa_node = numa_node; + set->cmd_size = sizeof(struct nullb_cmd); set->timeout = 5 * HZ; + set->nr_maps = 1; if (poll_queues) { set->nr_hw_queues += poll_queues; - set->nr_maps = 3; - } else { - set->nr_maps = 1; + set->nr_maps += 2; } - return blk_mq_alloc_tag_set(set); } +static int null_init_global_tag_set(void) +{ + int error; + + if (tag_set.ops) + return 0; + + tag_set.nr_hw_queues = g_submit_queues; + tag_set.queue_depth = g_hw_queue_depth; + tag_set.numa_node = g_home_node; + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + if (g_no_sched) + tag_set.flags |= BLK_MQ_F_NO_SCHED; + if (g_shared_tag_bitmap) + tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (g_blocking) + tag_set.flags |= BLK_MQ_F_BLOCKING; + + error = null_init_tag_set(&tag_set, g_poll_queues); + if (error) + tag_set.ops = NULL; + return error; +} + +static int null_setup_tagset(struct nullb *nullb) +{ + if (nullb->dev->shared_tags) { + nullb->tag_set = &tag_set; + return null_init_global_tag_set(); + } + + nullb->tag_set = &nullb->__tag_set; + nullb->tag_set->driver_data = nullb; + nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues; + nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth; + nullb->tag_set->numa_node = nullb->dev->home_node; + nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE; + if (nullb->dev->no_sched) + nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED; + if (nullb->dev->shared_tag_bitmap) + nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (nullb->dev->blocking) + nullb->tag_set->flags |= BLK_MQ_F_BLOCKING; + return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues); +} + static int null_validate_conf(struct nullb_device *dev) { if (dev->queue_mode == NULL_Q_RQ) { @@ -1904,21 +1914,7 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_free_nullb; - if (dev->shared_tags) { - if (!tag_set.ops) { - rv = null_init_tag_set(NULL, &tag_set); - if (rv) { - tag_set.ops = NULL; - goto out_cleanup_queues; - } - } - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); - } - + rv = null_setup_tagset(nullb); if (rv) goto out_cleanup_queues; -- cgit From 0a39e550c18244cdb9c4e671266a2a1d682d15c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:47 +0100 Subject: null_blk: remove null_gendisk_register null_gendisk_register isn't a very useful abstraction given that it doesn't even allocate the gendisk. Merge it into the only caller instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 03c3917a56fa..0c8d50423213 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1735,30 +1735,6 @@ static int setup_queues(struct nullb *nullb) return 0; } -static int null_gendisk_register(struct nullb *nullb) -{ - sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; - struct gendisk *disk = nullb->disk; - - set_capacity(disk, size); - - disk->major = null_major; - disk->first_minor = nullb->index; - disk->minors = 1; - disk->fops = &null_ops; - disk->private_data = nullb; - strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); - - if (nullb->dev->zoned) { - int ret = null_register_zoned_dev(nullb); - - if (ret) - return ret; - } - - return add_disk(disk); -} - static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues) { set->ops = &null_mq_ops; @@ -1972,7 +1948,22 @@ static int null_add_dev(struct nullb_device *dev) sprintf(nullb->disk_name, "nullb%d", nullb->index); } - rv = null_gendisk_register(nullb); + set_capacity(nullb->disk, + ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT); + nullb->disk->major = null_major; + nullb->disk->first_minor = nullb->index; + nullb->disk->minors = 1; + nullb->disk->fops = &null_ops; + nullb->disk->private_data = nullb; + strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + + if (nullb->dev->zoned) { + rv = null_register_zoned_dev(nullb); + if (rv) + goto out_ida_free; + } + + rv = add_disk(nullb->disk); if (rv) goto out_ida_free; -- cgit From e440626b1caf3767eda2d78610dfdc0ae7fd5238 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Feb 2024 10:32:48 +0100 Subject: null_blk: pass queue_limits to blk_mq_alloc_disk Pass the queue limits directly to blk_mq_alloc_disk instead of setting them one at a time. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20240220093248.3290292-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 41 +++++++++++++++++++-------------------- drivers/block/null_blk/null_blk.h | 4 ++-- drivers/block/null_blk/zoned.c | 15 +++++++------- 3 files changed, 29 insertions(+), 31 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 0c8d50423213..a0b726c8366c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1694,7 +1694,7 @@ static void null_del_dev(struct nullb *nullb) dev->nullb = NULL; } -static void null_config_discard(struct nullb *nullb) +static void null_config_discard(struct nullb *nullb, struct queue_limits *lim) { if (nullb->dev->discard == false) return; @@ -1711,7 +1711,7 @@ static void null_config_discard(struct nullb *nullb) return; } - blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); + lim->max_hw_discard_sectors = UINT_MAX >> 9; } static const struct block_device_operations null_ops = { @@ -1869,6 +1869,12 @@ static bool null_setup_fault(void) static int null_add_dev(struct nullb_device *dev) { + struct queue_limits lim = { + .logical_block_size = dev->blocksize, + .physical_block_size = dev->blocksize, + .max_hw_sectors = dev->max_sectors, + }; + struct nullb *nullb; int rv; @@ -1894,10 +1900,19 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb); + if (dev->virt_boundary) + lim.virt_boundary_mask = PAGE_SIZE - 1; + null_config_discard(nullb, &lim); + if (dev->zoned) { + rv = null_init_zoned_dev(dev, &lim); + if (rv) + goto out_cleanup_tags; + } + + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb); if (IS_ERR(nullb->disk)) { rv = PTR_ERR(nullb->disk); - goto out_cleanup_tags; + goto out_cleanup_zone; } nullb->q = nullb->disk->queue; @@ -1911,12 +1926,6 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_write_cache(nullb->q, true, true); } - if (dev->zoned) { - rv = null_init_zoned_dev(dev, nullb->q); - if (rv) - goto out_cleanup_disk; - } - nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); @@ -1924,22 +1933,12 @@ static int null_add_dev(struct nullb_device *dev) rv = ida_alloc(&nullb_indexes, GFP_KERNEL); if (rv < 0) { mutex_unlock(&lock); - goto out_cleanup_zone; + goto out_cleanup_disk; } nullb->index = rv; dev->index = rv; mutex_unlock(&lock); - blk_queue_logical_block_size(nullb->q, dev->blocksize); - blk_queue_physical_block_size(nullb->q, dev->blocksize); - if (dev->max_sectors) - blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); - - if (dev->virt_boundary) - blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1); - - null_config_discard(nullb); - if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 7c618d53d8fd..25320fe34bfe 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -131,7 +131,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, unsigned int nr_sectors); #ifdef CONFIG_BLK_DEV_ZONED -int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q); +int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim); int null_register_zoned_dev(struct nullb *nullb); void null_free_zoned_dev(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, @@ -144,7 +144,7 @@ ssize_t zone_cond_store(struct nullb_device *dev, const char *page, size_t count, enum blk_zone_cond cond); #else static inline int null_init_zoned_dev(struct nullb_device *dev, - struct request_queue *q) + struct queue_limits *lim) { pr_err("CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 3605afe105da..1689e2584104 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -58,7 +58,8 @@ static inline void null_unlock_zone(struct nullb_device *dev, mutex_unlock(&zone->mutex); } -int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) +int null_init_zoned_dev(struct nullb_device *dev, + struct queue_limits *lim) { sector_t dev_capacity_sects, zone_capacity_sects; struct nullb_zone *zone; @@ -151,23 +152,21 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) sector += dev->zone_size_sects; } + lim->zoned = true; + lim->chunk_sectors = dev->zone_size_sects; + lim->max_zone_append_sectors = dev->zone_size_sects; + lim->max_open_zones = dev->zone_max_open; + lim->max_active_zones = dev->zone_max_active; return 0; } int null_register_zoned_dev(struct nullb *nullb) { - struct nullb_device *dev = nullb->dev; struct request_queue *q = nullb->q; - disk_set_zoned(nullb->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); - blk_queue_chunk_sectors(q, dev->zone_size_sects); nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); - blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); - disk_set_max_open_zones(nullb->disk, dev->zone_max_open); - disk_set_max_active_zones(nullb->disk, dev->zone_max_active); - return blk_revalidate_disk_zones(nullb->disk, NULL); } -- cgit From 6f420d6a2dd8d4a795eab2839b0e08663269f9f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2024 08:36:46 +0100 Subject: pktcdvd: stop setting q->queuedata The two users can get the private data from the gendisk with one less pointer dereference, and we can drop the useless q parameter from pkt_make_request_write. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240222073647.3776769-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index abb82926b1c9..0cd65b27c197 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2338,9 +2338,9 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) pkt_queue_bio(pd, cloned_bio); } -static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +static void pkt_make_request_write(struct bio *bio) { - struct pktcdvd_device *pd = q->queuedata; + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data; sector_t zone; struct packet_data *pkt; int was_empty, blocked_bio; @@ -2432,7 +2432,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) static void pkt_submit_bio(struct bio *bio) { - struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata; + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data; struct device *ddev = disk_to_dev(pd->disk); struct bio *split; @@ -2476,7 +2476,7 @@ static void pkt_submit_bio(struct bio *bio) split = bio; } - pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + pkt_make_request_write(split); } while (split != bio); return; @@ -2490,7 +2490,6 @@ static void pkt_init_queue(struct pktcdvd_device *pd) blk_queue_logical_block_size(q, CD_FRAMESIZE); blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); - q->queuedata = pd; } static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) -- cgit From 4068550870360410261638479ffaf8364c366dd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2024 08:36:47 +0100 Subject: pktcdvd: set queue limits at disk allocation time Remove pkt_init_queue and just pass the two parameters directly to blk_alloc_disk. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240222073647.3776769-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 0cd65b27c197..12fcc881b04f 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2484,14 +2484,6 @@ end_io: bio_io_error(bio); } -static void pkt_init_queue(struct pktcdvd_device *pd) -{ - struct request_queue *q = pd->disk->queue; - - blk_queue_logical_block_size(q, CD_FRAMESIZE); - blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); -} - static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { struct device *ddev = disk_to_dev(pd->disk); @@ -2535,8 +2527,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) pd->bdev_handle = bdev_handle; set_blocksize(bdev_handle->bdev, CD_FRAMESIZE); - pkt_init_queue(pd); - atomic_set(&pd->cdrw.pending_bios, 0); pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name); if (IS_ERR(pd->cdrw.thread)) { @@ -2633,6 +2623,10 @@ static const struct block_device_operations pktcdvd_ops = { */ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) { + struct queue_limits lim = { + .max_hw_sectors = PACKET_MAX_SECTORS, + .logical_block_size = CD_FRAMESIZE, + }; int idx; int ret = -ENOMEM; struct pktcdvd_device *pd; @@ -2672,7 +2666,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->write_congestion_on = write_congestion_on; pd->write_congestion_off = write_congestion_off; - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_mem; -- cgit From 0f225f87873ee95dd4cf94dfc6a3249d4289e4ea Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 22 Feb 2024 08:34:20 +0000 Subject: null_blk: Delete nullb.{queue_depth, nr_queues} Since commit 8b631f9cf0b8 ("null_blk: remove the bio based I/O path"), struct nullb members queue_depth and nr_queues are only ever written, so delete them. With that, null_exit_hctx() can also be deleted. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240222083420.6026-1-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 11 ----------- drivers/block/null_blk/null_blk.h | 2 -- 2 files changed, 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index a0b726c8366c..71c39bcd872c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1620,14 +1620,6 @@ static void null_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } -static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nullb_queue *nq = hctx->driver_data; - struct nullb *nullb = nq->dev->nullb; - - nullb->nr_queues--; -} - static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { nq->dev = nullb->dev; @@ -1647,7 +1639,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; null_init_queue(nullb, nq); - nullb->nr_queues++; return 0; } @@ -1660,7 +1651,6 @@ static const struct blk_mq_ops null_mq_ops = { .poll = null_poll, .map_queues = null_map_queues, .init_hctx = null_init_hctx, - .exit_hctx = null_exit_hctx, }; static void null_del_dev(struct nullb *nullb) @@ -1731,7 +1721,6 @@ static int setup_queues(struct nullb *nullb) if (!nullb->queues) return -ENOMEM; - nullb->queue_depth = nullb->dev->hw_queue_depth; return 0; } diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 25320fe34bfe..477b97746823 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -114,14 +114,12 @@ struct nullb { struct gendisk *disk; struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set __tag_set; - unsigned int queue_depth; atomic_long_t cur_bytes; struct hrtimer bw_timer; unsigned long cache_flush_pos; spinlock_t lock; struct nullb_queue *queues; - unsigned int nr_queues; char disk_name[DISK_NAME_LEN]; }; -- cgit From 4a718d7dbab873bc24034fc865d3a5442632d1fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:42 +0100 Subject: xen-blkfront: set max_discard/secure erase limits to UINT_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xen-blkfront set the max discard limit to the capacity of the device, which is suboptimal when the capacity changes. Just set it to UINT_MAX, which has the same effect and is simpler. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4cc2884e7484..f78167cd5a63 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -944,20 +944,18 @@ static const struct blk_mq_ops blkfront_mq_ops = { static void blkif_set_queue_limits(struct blkfront_info *info) { struct request_queue *rq = info->rq; - struct gendisk *gd = info->gd; unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { - blk_queue_max_discard_sectors(rq, get_capacity(gd)); + blk_queue_max_discard_sectors(rq, UINT_MAX); rq->limits.discard_granularity = info->discard_granularity ?: info->physical_sector_size; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_max_secure_erase_sectors(rq, - get_capacity(gd)); + blk_queue_max_secure_erase_sectors(rq, UINT_MAX); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ -- cgit From 738be136327a56e5a67e1942a2c318fb91914a3f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:43 +0100 Subject: xen-blkfront: rely on the default discard granularity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The block layer now sets the discard granularity to the physical block size default. Take advantage of that in xen-blkfront and only set the discard granularity if explicitly specified. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index f78167cd5a63..1258f24b2855 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -951,8 +951,8 @@ static void blkif_set_queue_limits(struct blkfront_info *info) if (info->feature_discard) { blk_queue_max_discard_sectors(rq, UINT_MAX); - rq->limits.discard_granularity = info->discard_granularity ?: - info->physical_sector_size; + if (info->discard_granularity) + rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) blk_queue_max_secure_erase_sectors(rq, UINT_MAX); -- cgit From 4f81b87d91be2a00195f85847d040c2276cac2ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:44 +0100 Subject: xen-blkfront: don't redundantly set max_sements in blkif_recover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blkif_set_queue_limits already sets the max_sements limits, so don't do it a second time. Also remove a comment about a long fixe bug in blk_mq_update_nr_hw_queues. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1258f24b2855..7664638a0abb 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2008,14 +2008,10 @@ static int blkif_recover(struct blkfront_info *info) struct request *req, *n; int rc; struct bio *bio; - unsigned int segs; struct blkfront_ring_info *rinfo; blkfront_gather_backend_features(info); - /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ blkif_set_queue_limits(info); - segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); @@ -2035,7 +2031,9 @@ static int blkif_recover(struct blkfront_info *info) list_for_each_entry_safe(req, n, &info->requests, queuelist) { /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); - BUG_ON(req->nr_phys_segments > segs); + BUG_ON(req->nr_phys_segments > + (info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST)); blk_mq_requeue_request(req, false); } blk_mq_start_stopped_hw_queues(info->rq, true); -- cgit From ba3f67c1163812b5d7ec33705c31edaa30ce6c51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2024 13:58:45 +0100 Subject: xen-blkfront: atomically update queue limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the initial queue limits to blk_mq_alloc_disk and use the blkif_set_queue_limits API to update the limits on reconnect. Signed-off-by: Christoph Hellwig Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20240221125845.3610668-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7664638a0abb..fd7c0ff2139c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -941,37 +941,35 @@ static const struct blk_mq_ops blkfront_mq_ops = { .complete = blkif_complete_rq, }; -static void blkif_set_queue_limits(struct blkfront_info *info) +static void blkif_set_queue_limits(const struct blkfront_info *info, + struct queue_limits *lim) { - struct request_queue *rq = info->rq; unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); - if (info->feature_discard) { - blk_queue_max_discard_sectors(rq, UINT_MAX); + lim->max_hw_discard_sectors = UINT_MAX; if (info->discard_granularity) - rq->limits.discard_granularity = info->discard_granularity; - rq->limits.discard_alignment = info->discard_alignment; + lim->discard_granularity = info->discard_granularity; + lim->discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_max_secure_erase_sectors(rq, UINT_MAX); + lim->max_secure_erase_sectors = UINT_MAX; } /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, info->sector_size); - blk_queue_physical_block_size(rq, info->physical_sector_size); - blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); + lim->logical_block_size = info->sector_size; + lim->physical_block_size = info->physical_sector_size; + lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512; /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); + lim->seg_boundary_mask = PAGE_SIZE - 1; + lim->max_segment_size = PAGE_SIZE; /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); + lim->max_segments = segments / GRANTS_PER_PSEG; /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); + lim->dma_alignment = 511; } static const char *flush_info(struct blkfront_info *info) @@ -1068,6 +1066,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, u16 sector_size, unsigned int physical_sector_size) { + struct queue_limits lim = {}; struct gendisk *gd; int nr_minors = 1; int err; @@ -1134,11 +1133,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (err) goto out_release_minors; - gd = blk_mq_alloc_disk(&info->tag_set, NULL, info); + blkif_set_queue_limits(info, &lim); + gd = blk_mq_alloc_disk(&info->tag_set, &lim, info); if (IS_ERR(gd)) { err = PTR_ERR(gd); goto out_free_tag_set; } + blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue); strcpy(gd->disk_name, DEV_NAME); ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); @@ -1160,7 +1161,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->gd = gd; info->sector_size = sector_size; info->physical_sector_size = physical_sector_size; - blkif_set_queue_limits(info); xlvbd_flush(info); @@ -2004,14 +2004,19 @@ static int blkfront_probe(struct xenbus_device *dev, static int blkif_recover(struct blkfront_info *info) { + struct queue_limits lim; unsigned int r_index; struct request *req, *n; int rc; struct bio *bio; struct blkfront_ring_info *rinfo; + lim = queue_limits_start_update(info->rq); blkfront_gather_backend_features(info); - blkif_set_queue_limits(info); + blkif_set_queue_limits(info, &lim); + rc = queue_limits_commit_update(info->rq, &lim); + if (rc) + return rc; for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); -- cgit From 1221b9e982e181f1c37789c46fe5bfe32d97bec4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 Feb 2024 15:55:38 +0800 Subject: ublk: improve getting & putting ublk device Firstly convert get_device() and put_device() into ublk_get_device() and ublk_put_device(). Secondly annotate ublk_get_device() & ublk_put_device() as noinline for trace, especially it is often to trigger device deletion hang when incorrect order is used on ublkc mmap, ublkc close, io_uring_sqe_unregister_file, ublkb close. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240223075539.89945-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 01afe90a47ac..06d88d2008ba 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -605,14 +605,16 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_NEED_GET_DATA; } -static struct ublk_device *ublk_get_device(struct ublk_device *ub) +/* Called in slow path only, keep it noinline for trace purpose */ +static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub) { if (kobject_get_unless_zero(&ub->cdev_dev.kobj)) return ub; return NULL; } -static void ublk_put_device(struct ublk_device *ub) +/* Called in slow path only, keep it noinline for trace purpose */ +static noinline void ublk_put_device(struct ublk_device *ub) { put_device(&ub->cdev_dev); } @@ -671,7 +673,7 @@ static void ublk_free_disk(struct gendisk *disk) struct ublk_device *ub = disk->private_data; clear_bit(UB_STATE_USED, &ub->state); - put_device(&ub->cdev_dev); + ublk_put_device(ub); } static void ublk_store_owner_uid_gid(unsigned int *owner_uid, @@ -2142,7 +2144,7 @@ static void ublk_remove(struct ublk_device *ub) cancel_work_sync(&ub->stop_work); cancel_work_sync(&ub->quiesce_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); - put_device(&ub->cdev_dev); + ublk_put_device(ub); ublks_added--; } @@ -2235,7 +2237,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (ub->nr_privileged_daemon != ub->nr_queues_ready) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); - get_device(&ub->cdev_dev); + ublk_get_device(ub); ub->dev_info.state = UBLK_S_DEV_LIVE; if (ublk_dev_is_zoned(ub)) { -- cgit From 13fe8e6825e44129b6cbeee41d3012554bf8d687 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 Feb 2024 15:55:39 +0800 Subject: ublk: add UBLK_CMD_DEL_DEV_ASYNC The current command UBLK_CMD_DEL_DEV won't return until the device is released, this way looks more reliable, but makes userspace more difficult to implement, especially about orders: unmap command buffer(which holds one ublkc reference), ublkc close, io_uring_file_unregister, ublkb close. Add UBLK_CMD_DEL_DEV_ASYNC so that device deletion won't wait release, then userspace needn't worry about the above order. Actually both loop and nbd is deleted in this async way. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240223075539.89945-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 06d88d2008ba..bea3d5cf8a83 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2468,7 +2468,7 @@ static inline bool ublk_idr_freed(int id) return ptr == NULL; } -static int ublk_ctrl_del_dev(struct ublk_device **p_ub) +static int ublk_ctrl_del_dev(struct ublk_device **p_ub, bool wait) { struct ublk_device *ub = *p_ub; int idx = ub->ub_number; @@ -2502,7 +2502,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) * - the device number is freed already, we will not find this * device via ublk_get_device_from_id() */ - if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx))) + if (wait && wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx))) return -EINTR; return 0; } @@ -2901,7 +2901,10 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_add_dev(cmd); break; case UBLK_CMD_DEL_DEV: - ret = ublk_ctrl_del_dev(&ub); + ret = ublk_ctrl_del_dev(&ub, true); + break; + case UBLK_U_CMD_DEL_DEV_ASYNC: + ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: ret = ublk_ctrl_get_queue_affinity(ub, cmd); -- cgit From eabf5dfc2d6048d8415cd22d38d7d3e0bdb4dff9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:44:08 -0800 Subject: pktcdvd: don't set max_hw_sectors on the underlying device pktcdvd sets max_hw_sectors on the queue of the underlying device that it doesn't own (and doesn't reset it ever) since the driver was merged. This can create all kinds of problems as the underlying driver doesn't even know about it changing the limit. As the state purpose is to not create I/Os larger than a single frame, and pktcdvd never builds bios larger than that, just set REQ_NOMERGE on the bios it submits so that largers I/Os never get built. Note: I don't have packet writing hardware, so this is compile tested only. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229144408.1047967-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 12fcc881b04f..9071c4ebc1b9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -828,6 +828,12 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, */ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) { + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + bio->bi_opf |= REQ_NOMERGE; + spin_lock(&pd->iosched.lock); if (bio_data_dir(bio) == READ) bio_list_add(&pd->iosched.read_queue, bio); @@ -2191,11 +2197,6 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) ret = pkt_open_write(pd); if (ret) goto out_putdev; - /* - * Some CDRW drives can not handle writes larger than one packet, - * even if the size is a multiple of the packet size. - */ - blk_queue_max_hw_sectors(q, pd->settings.size); set_bit(PACKET_WRITABLE, &pd->flags); } else { pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); -- cgit From 7ea201f2cc1da999b9a0a23ea20b64eb2c4719a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:38:44 -0800 Subject: nbd: don't clear discard_sectors in nbd_config_put nbd_config_put currently clears discard_sectors when unusing a device. This is pretty odd behavior and different from the sector size configuration which is simply left in places and then reconfigured when nbd_set_size is as part of configuring the device. Change nbd_set_size to clear discard_sectors if discard is not supported so that all the queue limits changes are handled in one place. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229143846.1047223-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9ee9587375fa..384750d5259f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -336,6 +336,8 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (nbd->config->flags & NBD_FLAG_SEND_TRIM) blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); + else + blk_queue_max_discard_sectors(nbd->disk->queue, 0); blk_queue_logical_block_size(nbd->disk->queue, blksize); blk_queue_physical_block_size(nbd->disk->queue, blksize); @@ -1351,7 +1353,6 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; - blk_queue_max_discard_sectors(nbd->disk->queue, 0); mutex_unlock(&nbd->config_lock); nbd_put(nbd); -- cgit From 242a49e5c8784e93a99e4dc4277b28a8ba85eac5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:38:45 -0800 Subject: nbd: freeze the queue for queue limits updates nbd currently updates the logical and physical block sizes as well as the discard_sectors on a live queue. Freeze the queue first to make sure there are not commands in flight that can see torn or inconsistent limits. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229143846.1047223-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 384750d5259f..22ee0ed9aa6d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -316,7 +316,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, nsock->sent = 0; } -static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, +static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize) { if (!blksize) @@ -348,6 +348,18 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, return 0; } +static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, + loff_t blksize) +{ + int error; + + blk_mq_freeze_queue(nbd->disk->queue); + error = __nbd_set_size(nbd, bytesize, blksize); + blk_mq_unfreeze_queue(nbd->disk->queue); + + return error; +} + static void nbd_complete_rq(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); -- cgit From 268283244c0f018dec8bf4a9c69ce50684561f46 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Feb 2024 06:38:46 -0800 Subject: nbd: use the atomic queue limits API in nbd_set_size Use queue_limits_start_update / queue_limits_commit_update to update all the limits in one go and with proper sanity checking. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240229143846.1047223-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 22ee0ed9aa6d..9d4ec9273bf9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -319,6 +319,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize) { + struct queue_limits lim; + int error; + if (!blksize) blksize = 1u << NBD_DEF_BLKSIZE_BITS; @@ -334,12 +337,16 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (!nbd->pid) return 0; + lim = queue_limits_start_update(nbd->disk->queue); if (nbd->config->flags & NBD_FLAG_SEND_TRIM) - blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); + lim.max_hw_discard_sectors = UINT_MAX; else - blk_queue_max_discard_sectors(nbd->disk->queue, 0); - blk_queue_logical_block_size(nbd->disk->queue, blksize); - blk_queue_physical_block_size(nbd->disk->queue, blksize); + lim.max_hw_discard_sectors = 0; + lim.logical_block_size = blksize; + lim.physical_block_size = blksize; + error = queue_limits_commit_update(nbd->disk->queue, &lim); + if (error) + return error; if (max_part) set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); -- cgit From aa067325c05dc3a3aac588f40cacf8418f916cee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2024 06:40:35 -0700 Subject: drbd: pass the max_hw_sectors limit to blk_alloc_disk Pass a queue_limits structure with the max_hw_sectors limit to blk_alloc_disk instead of updating the limit on the allocated gendisk. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305134041.137006-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cea1e537fd56..113b441d4d36 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig int id; int vnr = adm_ctx->volume; enum drbd_ret_code err = ERR_NOMEM; + struct queue_limits lim = { + /* + * Setting the max_hw_sectors to an odd value of 8kibyte here. + * This triggers a max_bio_size message upon first attach or + * connect. + */ + .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, + }; device = minor_to_device(minor); if (device) @@ -2708,7 +2716,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - disk = blk_alloc_disk(NULL, NUMA_NO_NODE); + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_no_disk; @@ -2729,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); - /* Setting the max_hw_sectors to an odd value of 8kibyte here - This triggers a max_bio_size message upon first attach or connect */ - blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8); device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) -- cgit From 342d81fde24152adf9747e6e126c8c3179d1a54c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2024 06:40:36 -0700 Subject: drbd: refactor drbd_reconsider_queue_parameters Split out a drbd_max_peer_bio_size helper for the peer I/O size, and condense the various checks to a nested min3(..., max())) instead of using a lot of local variables. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305134041.137006-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 84 ++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 43747a1aae43..9135001a8e57 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1189,6 +1189,33 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } +static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) +{ + /* + * We may ignore peer limits if the peer is modern enough. From 8.3.8 + * onwards the peer can use multiple BIOs for a single peer_request. + */ + if (device->state.conn < C_WF_REPORT_PARAMS) + return device->peer_max_bio_size; + + if (first_peer_device(device)->connection->agreed_pro_version < 94) + return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + + /* + * Correct old drbd (up to 8.3.7) if it believes it can do more than + * 32KiB. + */ + if (first_peer_device(device)->connection->agreed_pro_version == 94) + return DRBD_MAX_SIZE_H80_PACKET; + + /* + * drbd 8.3.8 onwards, before 8.4.0 + */ + if (first_peer_device(device)->connection->agreed_pro_version < 100) + return DRBD_MAX_BIO_SIZE_P95; + return DRBD_MAX_BIO_SIZE; +} + static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) { q->limits.discard_granularity = granularity; @@ -1303,48 +1330,35 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi fixup_discard_support(device, q); } -void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) +void drbd_reconsider_queue_parameters(struct drbd_device *device, + struct drbd_backing_dev *bdev, struct o_qlim *o) { - unsigned int now, new, local, peer; - - now = queue_max_hw_sectors(device->rq_queue) << 9; - local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ - peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ + unsigned int now = queue_max_hw_sectors(device->rq_queue) << + SECTOR_SHIFT; + unsigned int new; if (bdev) { - local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; - device->local_max_bio_size = local; - } - local = min(local, DRBD_MAX_BIO_SIZE); - - /* We may ignore peer limits if the peer is modern enough. - Because new from 8.3.8 onwards the peer can use multiple - BIOs for a single peer_request */ - if (device->state.conn >= C_WF_REPORT_PARAMS) { - if (first_peer_device(device)->connection->agreed_pro_version < 94) - peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); - /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ - else if (first_peer_device(device)->connection->agreed_pro_version == 94) - peer = DRBD_MAX_SIZE_H80_PACKET; - else if (first_peer_device(device)->connection->agreed_pro_version < 100) - peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ - else - peer = DRBD_MAX_BIO_SIZE; + struct request_queue *b = bdev->backing_bdev->bd_disk->queue; - /* We may later detach and re-attach on a disconnected Primary. - * Avoid this setting to jump back in that case. - * We want to store what we know the peer DRBD can handle, - * not what the peer IO backend can handle. */ - if (peer > device->peer_max_bio_size) - device->peer_max_bio_size = peer; + device->local_max_bio_size = + queue_max_hw_sectors(b) << SECTOR_SHIFT; } - new = min(local, peer); - if (device->state.role == R_PRIMARY && new < now) - drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); - - if (new != now) + /* + * We may later detach and re-attach on a disconnected Primary. Avoid + * decreasing the value in this case. + * + * We want to store what we know the peer DRBD can handle, not what the + * peer IO backend can handle. + */ + new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size, + max(drbd_max_peer_bio_size(device), device->peer_max_bio_size)); + if (new != now) { + if (device->state.role == R_PRIMARY && new < now) + drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", + new, now); drbd_info(device, "max BIO size = %u\n", new); + } drbd_setup_queue_param(device, bdev, new, o); } -- cgit From 2828908d5cc8396e7c91d04d67e03ed834234bcd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:28 +0100 Subject: drbd: refactor the backing dev max_segments calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out a drbd_backing_dev_max_segments helper that checks the backing device limitation. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-4-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9135001a8e57..0326b7322ceb 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1295,30 +1295,39 @@ static void fixup_discard_support(struct drbd_device *device, struct request_que } } +/* This is the workaround for "bio would need to, but cannot, be split" */ +static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) +{ + unsigned int max_segments; + + rcu_read_lock(); + max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); + + if (!max_segments) + return BLK_MAX_SEGMENTS; + return max_segments; +} + static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int max_bio_size, struct o_qlim *o) { struct request_queue * const q = device->rq_queue; unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = 0; + unsigned int max_segments = BLK_MAX_SEGMENTS; struct request_queue *b = NULL; - struct disk_conf *dc; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - max_segments = dc->max_bio_bvecs; - rcu_read_unlock(); + max_segments = drbd_backing_dev_max_segments(device); blk_set_stacking_limits(&q->limits); } blk_queue_max_hw_sectors(q, max_hw_sectors); - /* This is the workaround for "bio would need to, but cannot, be split" */ - blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); + blk_queue_max_segments(q, max_segments); blk_queue_segment_boundary(q, PAGE_SIZE-1); decide_on_discard_support(device, bdev); -- cgit From e16344e506314e35b1a5a8ccd7b88f4b1844ebb0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:29 +0100 Subject: drbd: merge drbd_setup_queue_param into drbd_reconsider_queue_parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drbd_setup_queue_param is only called by drbd_reconsider_queue_parameters and there is no really clear boundary of responsibilities between the two. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-5-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 56 +++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 34 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0326b7322ceb..0f40fdee0899 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1309,45 +1309,16 @@ static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) return max_segments; } -static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, - unsigned int max_bio_size, struct o_qlim *o) -{ - struct request_queue * const q = device->rq_queue; - unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = BLK_MAX_SEGMENTS; - struct request_queue *b = NULL; - - if (bdev) { - b = bdev->backing_bdev->bd_disk->queue; - - max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - max_segments = drbd_backing_dev_max_segments(device); - - blk_set_stacking_limits(&q->limits); - } - - blk_queue_max_hw_sectors(q, max_hw_sectors); - blk_queue_max_segments(q, max_segments); - blk_queue_segment_boundary(q, PAGE_SIZE-1); - decide_on_discard_support(device, bdev); - - if (b) { - blk_stack_limits(&q->limits, &b->limits, 0); - disk_update_readahead(device->vdisk); - } - fixup_write_zeroes(device, q); - fixup_discard_support(device, q); -} - void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) { - unsigned int now = queue_max_hw_sectors(device->rq_queue) << - SECTOR_SHIFT; + struct request_queue * const q = device->rq_queue; + unsigned int now = queue_max_hw_sectors(q) << 9; + struct request_queue *b = NULL; unsigned int new; if (bdev) { - struct request_queue *b = bdev->backing_bdev->bd_disk->queue; + b = bdev->backing_bdev->bd_disk->queue; device->local_max_bio_size = queue_max_hw_sectors(b) << SECTOR_SHIFT; @@ -1369,7 +1340,24 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, drbd_info(device, "max BIO size = %u\n", new); } - drbd_setup_queue_param(device, bdev, new, o); + if (bdev) { + blk_set_stacking_limits(&q->limits); + blk_queue_max_segments(q, + drbd_backing_dev_max_segments(device)); + } else { + blk_queue_max_segments(q, BLK_MAX_SEGMENTS); + } + + blk_queue_max_hw_sectors(q, new >> SECTOR_SHIFT); + blk_queue_segment_boundary(q, PAGE_SIZE - 1); + decide_on_discard_support(device, bdev); + + if (bdev) { + blk_stack_limits(&q->limits, &b->limits, 0); + disk_update_readahead(device->vdisk); + } + fixup_write_zeroes(device, q); + fixup_discard_support(device, q); } /* Starts the worker thread */ -- cgit From e3992e02c970f6eb803b98b9f733cad40f190161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:30 +0100 Subject: drbd: don't set max_write_zeroes_sectors in decide_on_discard_support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixup_write_zeroes always overrides the max_write_zeroes_sectors value a little further down the callchain, so don't bother to setup a limit in decide_on_discard_support. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-6-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0f40fdee0899..a79b7fe5335d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1260,7 +1260,6 @@ static void decide_on_discard_support(struct drbd_device *device, blk_queue_discard_granularity(q, 512); max_discard_sectors = drbd_max_discard_sectors(connection); blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); return; not_supported: -- cgit From 5eaee6e9c8f9940ecee93678972774fb8dd450d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Mar 2024 15:03:31 +0100 Subject: drbd: split out a drbd_discard_supported helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to check if discard is supported for a given connection / backing device combination. Signed-off-by: Christoph Hellwig Reviewed-by: Philipp Reisner Reviewed-by: Lars Ellenberg Tested-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20240306140332.623759-7-philipp.reisner@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a79b7fe5335d..94ed2b3ea636 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1231,24 +1231,33 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) return AL_EXTENT_SIZE >> 9; } -static void decide_on_discard_support(struct drbd_device *device, +static bool drbd_discard_supported(struct drbd_connection *connection, struct drbd_backing_dev *bdev) { - struct drbd_connection *connection = - first_peer_device(device)->connection; - struct request_queue *q = device->rq_queue; - unsigned int max_discard_sectors; - if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) - goto not_supported; + return false; if (connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); - goto not_supported; + return false; } + return true; +} + +static void decide_on_discard_support(struct drbd_device *device, + struct drbd_backing_dev *bdev) +{ + struct drbd_connection *connection = + first_peer_device(device)->connection; + struct request_queue *q = device->rq_queue; + unsigned int max_discard_sectors; + + if (!drbd_discard_supported(connection, bdev)) + goto not_supported; + /* * We don't care for the granularity, really. * -- cgit From e6dfe748f09e37f77437bd337f891f5b57d5d5a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2024 06:40:41 -0700 Subject: drbd: atomically update queue limits in drbd_reconsider_queue_parameters Switch drbd_reconsider_queue_parameters to set up the queue parameters in an on-stack queue_limits structure and apply the atomically. Remove various helpers that have become so trivial that they can be folded into drbd_reconsider_queue_parameters. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240305134041.137006-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 119 +++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 73 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 94ed2b3ea636..fbd92803dc1d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1216,11 +1216,6 @@ static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) return DRBD_MAX_BIO_SIZE; } -static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) -{ - q->limits.discard_granularity = granularity; -} - static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) { /* when we introduced REQ_WRITE_SAME support, we also bumped @@ -1247,62 +1242,6 @@ static bool drbd_discard_supported(struct drbd_connection *connection, return true; } -static void decide_on_discard_support(struct drbd_device *device, - struct drbd_backing_dev *bdev) -{ - struct drbd_connection *connection = - first_peer_device(device)->connection; - struct request_queue *q = device->rq_queue; - unsigned int max_discard_sectors; - - if (!drbd_discard_supported(connection, bdev)) - goto not_supported; - - /* - * We don't care for the granularity, really. - * - * Stacking limits below should fix it for the local device. Whether or - * not it is a suitable granularity on the remote device is not our - * problem, really. If you care, you need to use devices with similar - * topology on all peers. - */ - blk_queue_discard_granularity(q, 512); - max_discard_sectors = drbd_max_discard_sectors(connection); - blk_queue_max_discard_sectors(q, max_discard_sectors); - return; - -not_supported: - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); -} - -static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) -{ - /* Fixup max_write_zeroes_sectors after blk_stack_limits(): - * if we can handle "zeroes" efficiently on the protocol, - * we want to do that, even if our backend does not announce - * max_write_zeroes_sectors itself. */ - struct drbd_connection *connection = first_peer_device(device)->connection; - /* If the peer announces WZEROES support, use it. Otherwise, rather - * send explicit zeroes than rely on some discard-zeroes-data magic. */ - if (connection->agreed_features & DRBD_FF_WZEROES) - q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; - else - q->limits.max_write_zeroes_sectors = 0; -} - -static void fixup_discard_support(struct drbd_device *device, struct request_queue *q) -{ - unsigned int max_discard = device->rq_queue->limits.max_discard_sectors; - unsigned int discard_granularity = - device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT; - - if (discard_granularity > max_discard) { - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); - } -} - /* This is the workaround for "bio would need to, but cannot, be split" */ static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) { @@ -1320,8 +1259,11 @@ static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) { + struct drbd_connection *connection = + first_peer_device(device)->connection; struct request_queue * const q = device->rq_queue; unsigned int now = queue_max_hw_sectors(q) << 9; + struct queue_limits lim; struct request_queue *b = NULL; unsigned int new; @@ -1348,24 +1290,55 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, drbd_info(device, "max BIO size = %u\n", new); } + lim = queue_limits_start_update(q); if (bdev) { - blk_set_stacking_limits(&q->limits); - blk_queue_max_segments(q, - drbd_backing_dev_max_segments(device)); + blk_set_stacking_limits(&lim); + lim.max_segments = drbd_backing_dev_max_segments(device); } else { - blk_queue_max_segments(q, BLK_MAX_SEGMENTS); + lim.max_segments = BLK_MAX_SEGMENTS; } - blk_queue_max_hw_sectors(q, new >> SECTOR_SHIFT); - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - decide_on_discard_support(device, bdev); + lim.max_hw_sectors = new >> SECTOR_SHIFT; + lim.seg_boundary_mask = PAGE_SIZE - 1; - if (bdev) { - blk_stack_limits(&q->limits, &b->limits, 0); - disk_update_readahead(device->vdisk); + /* + * We don't care for the granularity, really. + * + * Stacking limits below should fix it for the local device. Whether or + * not it is a suitable granularity on the remote device is not our + * problem, really. If you care, you need to use devices with similar + * topology on all peers. + */ + if (drbd_discard_supported(connection, bdev)) { + lim.discard_granularity = 512; + lim.max_hw_discard_sectors = + drbd_max_discard_sectors(connection); + } else { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; } - fixup_write_zeroes(device, q); - fixup_discard_support(device, q); + + if (bdev) + blk_stack_limits(&lim, &b->limits, 0); + + /* + * If we can handle "zeroes" efficiently on the protocol, we want to do + * that, even if our backend does not announce max_write_zeroes_sectors + * itself. + */ + if (connection->agreed_features & DRBD_FF_WZEROES) + lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; + else + lim.max_write_zeroes_sectors = 0; + + if ((lim.discard_granularity >> SECTOR_SHIFT) > + lim.max_hw_discard_sectors) { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; + } + + if (queue_limits_commit_update(q, &lim)) + drbd_err(device, "setting new queue limits failed\n"); } /* Starts the worker thread */ -- cgit From f98364e926626c678fb4b9004b75cacf92ff0662 Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Tue, 5 Mar 2024 16:20:48 +0800 Subject: aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts This patch is against CVE-2023-6270. The description of cve is: A flaw was found in the ATA over Ethernet (AoE) driver in the Linux kernel. The aoecmd_cfg_pkts() function improperly updates the refcnt on `struct net_device`, and a use-after-free can be triggered by racing between the free on the struct and the access through the `skbtxq` global queue. This could lead to a denial of service condition or potential code execution. In aoecmd_cfg_pkts(), it always calls dev_put(ifp) when skb initial code is finished. But the net_device ifp will still be used in later tx()->dev_queue_xmit() in kthread. Which means that the dev_put(ifp) should NOT be called in the success path of skb initial code in aoecmd_cfg_pkts(). Otherwise tx() may run into use-after-free because the net_device is freed. This patch removed the dev_put(ifp) in the success path in aoecmd_cfg_pkts(), and added dev_put() after skb xmit in tx(). Link: https://nvd.nist.gov/vuln/detail/CVE-2023-6270 Fixes: 7562f876cd93 ("[NET]: Rework dev_base via list_head (v3)") Signed-off-by: Chun-Yi Lee Link: https://lore.kernel.org/r/20240305082048.25526-1-jlee@suse.com Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 12 ++++++------ drivers/block/aoe/aoenet.c | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d7317425be51..cc9077b588d7 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu rcu_read_lock(); for_each_netdev_rcu(&init_net, ifp) { dev_hold(ifp); - if (!is_aoe_netif(ifp)) - goto cont; + if (!is_aoe_netif(ifp)) { + dev_put(ifp); + continue; + } skb = new_skb(sizeof *h + sizeof *ch); if (skb == NULL) { printk(KERN_INFO "aoe: skb alloc failure\n"); - goto cont; + dev_put(ifp); + continue; } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; @@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu h->major = cpu_to_be16(aoemajor); h->minor = aoeminor; h->cmd = AOECMD_CFG; - -cont: - dev_put(ifp); } rcu_read_unlock(); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index c51ea95bc2ce..923a134fd766 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -63,6 +63,7 @@ tx(int id) __must_hold(&txlock) pr_warn("aoe: packet could not be sent on %s. %s\n", ifp ? ifp->name : "netif", "consider increasing tx_queue_len"); + dev_put(ifp); spin_lock_irq(&txlock); } return 0; -- cgit From 0e46064ebebb90b02c53283106f26600aa38c986 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 2 Mar 2024 04:26:37 +0900 Subject: virtio_blk: Do not use disk_set_max_open/active_zones() In virtblk_read_zoned_limits(), setting a zoned block device maximum number of open and active zones using the functions disk_set_max_open_zones() and disk_set_max_active_zones() is incorrect as setting the limits for the request queue is now done atomically when the gendisk is created (with blk_mq_alloc_disk()). The value set by the disk_set_max_open/active_zones() functions will be overwritten. Fix this by setting the maximum number of open and active zones directly in the queue_limits structure passed to virtblk_read_zoned_limits(). Fixes: 8b837256560c ("virtio_blk: pass queue_limits to blk_mq_alloc_disk") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240301192639.410183-2-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d8b55874cd59..aa9f86507719 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -732,12 +732,12 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); - disk_set_max_open_zones(vblk->disk, v); + lim->max_open_zones = v; dev_dbg(&vdev->dev, "max open zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, zoned.max_active_zones, &v); - disk_set_max_active_zones(vblk->disk, v); + lim->max_active_zones = v; dev_dbg(&vdev->dev, "max active zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, -- cgit From d8d6608b76b98b7b88795a529d3d910ac9ef05f4 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 8 Mar 2024 09:51:04 +0100 Subject: block/swim: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/a00aea8201ea85ae726411bb0fb015ea026ff40a.1709886922.git.u.kleine-koenig@pengutronix.de Signed-off-by: Jens Axboe --- drivers/block/swim.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 16bdf62067d8..6731678f3a41 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -916,7 +916,7 @@ out: return ret; } -static int swim_remove(struct platform_device *dev) +static void swim_remove(struct platform_device *dev) { struct swim_priv *swd = platform_get_drvdata(dev); int drive; @@ -937,13 +937,11 @@ static int swim_remove(struct platform_device *dev) release_mem_region(res->start, resource_size(res)); kfree(swd); - - return 0; } static struct platform_driver swim_driver = { .probe = swim_probe, - .remove = swim_remove, + .remove_new = swim_remove, .driver = { .name = CARDNAME, }, -- cgit