From 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 12 Apr 2018 19:11:58 +0100 Subject: block: do not use interruptible wait anywhere When blk_queue_enter() waits for a queue to unfreeze, or unset the PREEMPT_ONLY flag, do not allow it to be interrupted by a signal. The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably"). Note the SCSI device is resumed asynchronously, i.e. after un-freezing userspace tasks. So that commit exposed the bug as a regression in v4.15. A mysterious SIGBUS (or -EIO) sometimes happened during the time the device was being resumed. Most frequently, there was no kernel log message, and we saw Xorg or Xwayland killed by SIGBUS.[1] [1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979 Without this fix, I get an IO error in this test: # dd if=/dev/sda of=/dev/null iflag=direct & \ while killall -SIGUSR1 dd; do sleep 0.1; done & \ echo mem > /sys/power/state ; \ sleep 5; killall dd # stop after 5 seconds The interruptible wait was added to blk_queue_enter in commit 3ef28e83ab15 ("block: generic request_queue reference counting"). Before then, the interruptible wait was only in blk-mq, but I don't think it could ever have been correct. Reviewed-by: Bart Van Assche Cc: stable@vger.kernel.org Signed-off-by: Alan Jenkins Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 806ce2442819..3e77409e5a84 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -915,7 +915,6 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) while (true) { bool success = false; - int ret; rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { @@ -947,14 +946,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) */ smp_rmb(); - ret = wait_event_interruptible(q->mq_freeze_wq, - (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || - blk_queue_dying(q)); + wait_event(q->mq_freeze_wq, + (atomic_read(&q->mq_freeze_depth) == 0 && + (preempt || !blk_queue_preempt_only(q))) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; - if (ret) - return ret; } } -- cgit From 1894e916546df0efec9890a5c9954f4ad281494c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Apr 2018 16:24:29 -0600 Subject: loop: remove cmd->rq member We can always get at the request from the payload, no need to store a pointer to it. Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 36 +++++++++++++++++++----------------- drivers/block/loop.h | 1 - 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c9d04497a415..8b2fde2109fc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -452,9 +452,9 @@ static void lo_complete_rq(struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio && - cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) { - struct bio *bio = cmd->rq->bio; + if (unlikely(req_op(rq) == REQ_OP_READ && cmd->use_aio && + cmd->ret >= 0 && cmd->ret < blk_rq_bytes(rq))) { + struct bio *bio = rq->bio; bio_advance(bio, cmd->ret); zero_fill_bio(bio); @@ -465,11 +465,13 @@ static void lo_complete_rq(struct request *rq) static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); + if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(rq); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) @@ -487,7 +489,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, { struct iov_iter iter; struct bio_vec *bvec; - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); struct bio *bio = rq->bio; struct file *file = lo->lo_backing_file; unsigned int offset; @@ -1702,15 +1704,16 @@ EXPORT_SYMBOL(loop_unregister_transfer); static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { - struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); - struct loop_device *lo = cmd->rq->q->queuedata; + struct request *rq = bd->rq; + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct loop_device *lo = rq->q->queuedata; - blk_mq_start_request(bd->rq); + blk_mq_start_request(rq); if (lo->lo_state != Lo_bound) return BLK_STS_IOERR; - switch (req_op(cmd->rq)) { + switch (req_op(rq)) { case REQ_OP_FLUSH: case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: @@ -1723,8 +1726,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && cmd->rq->bio && cmd->rq->bio->bi_css) { - cmd->css = cmd->rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_css) { + cmd->css = rq->bio->bi_css; css_get(cmd->css); } else #endif @@ -1736,8 +1739,9 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, static void loop_handle_cmd(struct loop_cmd *cmd) { - const bool write = op_is_write(req_op(cmd->rq)); - struct loop_device *lo = cmd->rq->q->queuedata; + struct request *rq = blk_mq_rq_from_pdu(cmd); + const bool write = op_is_write(req_op(rq)); + struct loop_device *lo = rq->q->queuedata; int ret = 0; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { @@ -1745,12 +1749,12 @@ static void loop_handle_cmd(struct loop_cmd *cmd) goto failed; } - ret = do_req_filebacked(lo, cmd->rq); + ret = do_req_filebacked(lo, rq); failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { cmd->ret = ret ? -EIO : 0; - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(rq); } } @@ -1767,9 +1771,7 @@ static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->rq = rq; kthread_init_work(&cmd->work, loop_queue_work); - return 0; } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 0f45416e4fcf..b78de9879f4f 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -66,7 +66,6 @@ struct loop_device { struct loop_cmd { struct kthread_work work; - struct request *rq; bool use_aio; /* use AIO interface to handle I/O */ atomic_t ref; /* only for aio */ long ret; -- cgit From f9de14bc7e7aac77fd44bb2f62206eb9e494f0d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Apr 2018 16:25:57 -0600 Subject: loop: handle short DIO reads We ran into an issue with loop and btrfs, where btrfs would complain about checksum errors. It turns out that is because we don't handle short reads at all, we just zero fill the remainder. Worse than that, we don't handle the filling properly, which results in loop trying to advance a single bio by much more than its size, since it doesn't take chaining into account. Handle short reads appropriately, by simply retrying at the new correct offset. End the remainder of the request with EIO, if we get a 0 read. Fixes: bc07c10a3603 ("block: loop: support DIO & AIO") Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8b2fde2109fc..5d4e31655d96 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -451,16 +451,36 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) static void lo_complete_rq(struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + blk_status_t ret = BLK_STS_OK; - if (unlikely(req_op(rq) == REQ_OP_READ && cmd->use_aio && - cmd->ret >= 0 && cmd->ret < blk_rq_bytes(rq))) { - struct bio *bio = rq->bio; - - bio_advance(bio, cmd->ret); - zero_fill_bio(bio); + if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || + req_op(rq) != REQ_OP_READ) { + if (cmd->ret < 0) + ret = BLK_STS_IOERR; + goto end_io; } - blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK); + /* + * Short READ - if we got some data, advance our request and + * retry it. If we got no data, end the rest with EIO. + */ + if (cmd->ret) { + blk_update_request(rq, BLK_STS_OK, cmd->ret); + cmd->ret = 0; + blk_mq_requeue_request(rq, true); + } else { + if (cmd->use_aio) { + struct bio *bio = rq->bio; + + while (bio) { + zero_fill_bio(bio); + bio = bio->bi_next; + } + } + ret = BLK_STS_IOERR; +end_io: + blk_mq_end_request(rq, ret); + } } static void lo_rw_aio_do_completion(struct loop_cmd *cmd) -- cgit From b64576cbf36afa5fabf3b31f62a1994c429ef855 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: m68k/mac: Don't remap SWIM MMIO region For reasons I don't understand, calling ioremap() then iounmap() on the SWIM MMIO region causes a hang on 68030 (but not on 68040). ~# modprobe swim_mod SWIM floppy driver Version 0.2 (2008-10-30) SWIM device not found ! watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [modprobe:285] Modules linked in: swim_mod(+) Format 00 Vector: 0064 PC: 000075aa Status: 2000 Not tainted ORIG_D0: ffffffff D0: d00c0000 A2: 007c2370 A1: 003f810c A0: 00040000 D5: d0096800 D4: d0097e00 D3: 00000001 D2: 00000003 D1: 00000000 Non-Maskable Interrupt Modules linked in: swim_mod(+) PC: [<000075ba>] __iounmap+0x24/0x10e SR: 2000 SP: 007abc48 a2: 007c2370 d0: d00c0000 d1: 000001a0 d2: 00000019 d3: 00000001 d4: d0097e00 d5: d0096800 a0: 00040000 a1: 003f810c Process modprobe (pid: 285, task=007c2370) Frame format=0 Stack from 007abc7c: ffffffed 00000000 006a4060 004712e0 007abca0 000076ea d0080000 00080000 010bb4b8 007abcd8 010ba542 d0096000 00000000 00000000 00000001 010bb59c 00000000 007abf30 010bb4b8 0047760a 0047763c 00477612 00616540 007abcec 0020a91a 00477600 0047760a 010bb4cc 007abd18 002092f2 0047760a 00333b06 007abd5c 00000000 0047760a 010bb4cc 00404f90 004776b8 00000001 007abd38 00209446 010bb4cc 0047760a 010bb4cc 0020938e 0031f8be 00616540 007abd64 Call Trace: [<000076ea>] iounmap+0x46/0x5a [<00080000>] shrink_page_list+0x7f6/0xe06 [<010ba542>] swim_probe+0xe4/0x496 [swim_mod] [<0020a91a>] platform_drv_probe+0x20/0x5e [<002092f2>] driver_probe_device+0x21c/0x2b8 [<00333b06>] mutex_lock+0x0/0x2e [<00209446>] __driver_attach+0xb8/0xce [<0020938e>] __driver_attach+0x0/0xce [<0031f8be>] klist_next+0x0/0xa0 [<00207562>] bus_for_each_dev+0x74/0xba [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<00333b06>] mutex_lock+0x0/0x2e [<00208e44>] driver_attach+0x1a/0x1e [<0020938e>] __driver_attach+0x0/0xce [<00207e26>] bus_add_driver+0x188/0x234 [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<00209894>] driver_register+0x58/0x104 [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<010bd000>] swim_init+0x0/0x2c [swim_mod] [<0020a7be>] __platform_driver_register+0x38/0x3c [<010bd028>] swim_init+0x28/0x2c [swim_mod] [<000020dc>] do_one_initcall+0x38/0x196 [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<00075008>] __free_pages+0x0/0x38 [<000045c0>] mangle_kernel_stack+0x30/0xda [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<003331cc>] mutex_unlock+0x0/0x3e [<00333b06>] mutex_lock+0x0/0x2e [<0005ced4>] do_init_module+0x42/0x266 [<010bd000>] swim_init+0x0/0x2c [swim_mod] [<000344c0>] blocking_notifier_call_chain+0x0/0x20 [<0005eda0>] load_module+0x1a30/0x1e70 [<0000465d>] mangle_kernel_stack+0xcd/0xda [<00331c64>] __generic_copy_from_user+0x0/0x46 [<0033256e>] _cond_resched+0x0/0x32 [<00331b9c>] memset+0x0/0x98 [<0033256e>] _cond_resched+0x0/0x32 [<0005f25c>] SyS_init_module+0x7c/0x112 [<00002000>] _start+0x0/0x8 [<00002000>] _start+0x0/0x8 [<00331c82>] __generic_copy_from_user+0x1e/0x46 [<0005f2b2>] SyS_init_module+0xd2/0x112 [<0000465d>] mangle_kernel_stack+0xcd/0xda [<00002b40>] syscall+0x8/0xc [<0000465d>] mangle_kernel_stack+0xcd/0xda [<0008c00c>] pcpu_balance_workfn+0xb2/0x40e Code: 2200 7419 e4a9 e589 2841 d9fc 0000 1000 <2414> 7203 c282 7602 b681 6600 0096 0242 fe00 0482 0000 0000 e9c0 11c3 ed89 2642 There's no need to call ioremap() for the SWIM address range, as it lies within the usual IO device region at 0x5000 0000, which has already been mapped by head.S. Remove the redundant ioremap() and iounmap() calls to fix the hang. Cc: Laurent Vivier Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 64e066eba72e..92f0cddc597e 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -911,7 +911,7 @@ static int swim_probe(struct platform_device *dev) goto out; } - swim_base = ioremap(res->start, resource_size(res)); + swim_base = (struct swim __iomem *)res->start; if (!swim_base) { ret = -ENOMEM; goto out_release_io; @@ -923,7 +923,7 @@ static int swim_probe(struct platform_device *dev) if (!get_swim_mode(swim_base)) { printk(KERN_INFO "SWIM device not found !\n"); ret = -ENODEV; - goto out_iounmap; + goto out_release_io; } /* set platform driver data */ @@ -931,7 +931,7 @@ static int swim_probe(struct platform_device *dev) swd = kzalloc(sizeof(struct swim_priv), GFP_KERNEL); if (!swd) { ret = -ENOMEM; - goto out_iounmap; + goto out_release_io; } platform_set_drvdata(dev, swd); @@ -945,8 +945,6 @@ static int swim_probe(struct platform_device *dev) out_kfree: kfree(swd); -out_iounmap: - iounmap(swim_base); out_release_io: release_mem_region(res->start, resource_size(res)); out: @@ -974,8 +972,6 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) floppy_eject(&swd->unit[drive]); - iounmap(swd->base); - res = platform_get_resource(dev, IORESOURCE_MEM, 0); if (res) release_mem_region(res->start, resource_size(res)); -- cgit From 7ae6a2b6cc058005ee3d0d2b9ce27688e51afa4b Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Fix array bounds check In the floppy_find() function in swim.c is a call to get_disk(swd->unit[drive].disk). The actual parameter to this call can be a NULL pointer when drive == swd->floppy_count. This causes an oops in get_disk(). Data read fault at 0x00000198 in Super Data (pc=0x1be5b6) BAD KERNEL BUSERR Oops: 00000000 Modules linked in: swim_mod ipv6 mac8390 PC: [<001be5b6>] get_disk+0xc/0x76 SR: 2004 SP: 9a078bc1 a2: 0213ed90 d0: 00000000 d1: 00000000 d2: 00000000 d3: 000000ff d4: 00000002 d5: 02983590 a0: 02332e00 a1: 022dfd64 Process dd (pid: 285, task=020ab25b) Frame format=B ssw=074d isc=4a88 isb=6732 daddr=00000198 dobuf=00000000 baddr=001be5bc dibuf=bfffffff ver=f Stack from 022dfca4: 00000000 0203fc00 0213ed90 022dfcc0 02982936 00000000 00200000 022dfd08 0020f85a 00200000 022dfd64 02332e00 004040fc 00000014 001be77e 022dfd64 00334e4a 001be3f8 0800001d 022dfd64 01c04b60 01c04b70 022aba80 029828f8 02332e00 022dfd2c 001be7ac 0203fc00 00200000 022dfd64 02103a00 01c04b60 01c04b60 0200e400 022dfd68 000e191a 00200000 022dfd64 02103a00 0800001d 00000000 00000003 000b89de 00500000 02103a00 01c04b60 02103a08 01c04c2e Call Trace: [<02982936>] floppy_find+0x3e/0x4a [swim_mod] [<00200000>] uart_remove_one_port+0x1a2/0x260 [<0020f85a>] kobj_lookup+0xde/0x132 [<00200000>] uart_remove_one_port+0x1a2/0x260 [<001be77e>] get_gendisk+0x0/0x130 [<00334e4a>] mutex_lock+0x0/0x2e [<001be3f8>] disk_block_events+0x0/0x6c [<029828f8>] floppy_find+0x0/0x4a [swim_mod] [<001be7ac>] get_gendisk+0x2e/0x130 [<00200000>] uart_remove_one_port+0x1a2/0x260 [<000e191a>] __blkdev_get+0x32/0x45a [<00200000>] uart_remove_one_port+0x1a2/0x260 [<000b89de>] complete_walk+0x0/0x8a [<000e1e22>] blkdev_get+0xe0/0x29a [<000e1fdc>] blkdev_open+0x0/0xb0 [<000b89de>] complete_walk+0x0/0x8a [<000e1fdc>] blkdev_open+0x0/0xb0 [<000e01cc>] bd_acquire+0x74/0x8a [<000e205c>] blkdev_open+0x80/0xb0 [<000e1fdc>] blkdev_open+0x0/0xb0 [<000abf24>] do_dentry_open+0x1a4/0x322 [<00020000>] __do_proc_douintvec+0x22/0x27e [<000b89de>] complete_walk+0x0/0x8a [<000baa62>] link_path_walk+0x0/0x48e [<000ba3f8>] inode_permission+0x20/0x54 [<000ac0e4>] vfs_open+0x42/0x78 [<000bc372>] path_openat+0x2b2/0xeaa [<000bc0c0>] path_openat+0x0/0xeaa [<0004463e>] __irq_wake_thread+0x0/0x4e [<0003a45a>] task_tick_fair+0x18/0xc8 [<000bd00a>] do_filp_open+0xa0/0xea [<000abae0>] do_sys_open+0x11a/0x1ee [<00020000>] __do_proc_douintvec+0x22/0x27e [<000abbf4>] SyS_open+0x1e/0x22 [<00020000>] __do_proc_douintvec+0x22/0x27e [<00002b40>] syscall+0x8/0xc [<00020000>] __do_proc_douintvec+0x22/0x27e [<0000c00b>] dyadic+0x1/0x28 Code: 4e5e 4e75 4e56 fffc 2f0b 2f02 266e 0008 <206b> 0198 4a88 6732 2428 002c 661e 486b 0058 4eb9 0032 0b96 588f 4a88 672c 2008 Disabling lock debugging due to kernel taint Fix the array index bounds check to avoid this. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Fixes: 8852ecd97488 ("[PATCH] m68k: mac - Add SWIM floppy support") Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/block/swim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 92f0cddc597e..2cdfc0db5966 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -795,7 +795,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) struct swim_priv *swd = data; int drive = (*part & 3); - if (drive > swd->floppy_count) + if (drive >= swd->floppy_count) return NULL; *part = 0; -- cgit From c1d6207cc0eef2a7f8551f9c7420d8776268f6e1 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Remove extra put_disk() call from error path Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Fixes: 103db8b2dfa5 ("[PATCH] swim: stop sharing request queue across multiple gendisks") Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/block/swim.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2cdfc0db5966..0258a96e0c46 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -861,7 +861,6 @@ static int swim_floppy_init(struct swim_priv *swd) &swd->lock); if (!swd->unit[drive].disk->queue) { err = -ENOMEM; - put_disk(swd->unit[drive].disk); goto exit_put_disks; } blk_queue_bounce_limit(swd->unit[drive].disk->queue, -- cgit From 8e2ab5a4efaac77fb93e5b5b109d0b3976fdd3a0 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Don't log an error message for an invalid ioctl The 'eject' shell command may send various different ioctl commands. This leads to error messages on the console even though the FDEJECT ioctl succeeds. ~# eject floppy SWIM floppy_ioctl: unknown cmd 21257 SWIM floppy_ioctl: unknown cmd 1 Don't log an error message for an invalid ioctl, just do as the swim3 driver does and return -ENOTTY. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- drivers/block/swim.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 0258a96e0c46..7b847170cf71 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -727,14 +727,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode, if (copy_to_user((void __user *) param, (void *) &floppy_type, sizeof(struct floppy_struct))) return -EFAULT; - break; - - default: - printk(KERN_DEBUG "SWIM floppy_ioctl: unknown cmd %d\n", - cmd); - return -ENOSYS; + return 0; } - return 0; + return -ENOTTY; } static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) -- cgit From 56a1c5ee54f69dd767fb61d301883dc919ddc259 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Rename macros to avoid inconsistent inverted logic The Sony drive status bits use active-low logic. The swim_readbit() function converts that to 'C' logic for readability. Hence, the sense of the names of the status bit macros should not be inverted. Mostly they are correct. However, the TWOMEG_DRIVE, MFM_MODE and TWOMEG_MEDIA macros have inverted sense (like MkLinux). Fix this inconsistency and make the following patches less confusing. The same problem affects swim3.c so fix that too. No functional change. The FDHD drive status bits are documented in sonydriv.cpp from MAME and in swimiii.h from MkLinux. Cc: Laurent Vivier Cc: Benjamin Herrenschmidt Cc: linuxppc-dev@lists.ozlabs.org Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 8 ++++---- drivers/block/swim3.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 7b847170cf71..d1ee4670666a 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -110,7 +110,7 @@ struct iwm { /* Select values for swim_select and swim_readbit */ #define READ_DATA_0 0x074 -#define TWOMEG_DRIVE 0x075 +#define ONEMEG_DRIVE 0x075 #define SINGLE_SIDED 0x076 #define DRIVE_PRESENT 0x077 #define DISK_IN 0x170 @@ -118,9 +118,9 @@ struct iwm { #define TRACK_ZERO 0x172 #define TACHO 0x173 #define READ_DATA_1 0x174 -#define MFM_MODE 0x175 +#define GCR_MODE 0x175 #define SEEK_COMPLETE 0x176 -#define ONEMEG_MEDIA 0x177 +#define TWOMEG_MEDIA 0x177 /* Bits in handshake register */ @@ -612,7 +612,7 @@ static void setup_medium(struct floppy_state *fs) struct floppy_struct *g; fs->disk_in = 1; fs->write_protected = swim_readbit(base, WRITE_PROT); - fs->type = swim_readbit(base, ONEMEG_MEDIA); + fs->type = swim_readbit(base, TWOMEG_MEDIA); if (swim_track00(base)) printk(KERN_ERR diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index af51015d056e..469541c1e51e 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -148,7 +148,7 @@ struct swim3 { #define MOTOR_ON 2 #define RELAX 3 /* also eject in progress */ #define READ_DATA_0 4 -#define TWOMEG_DRIVE 5 +#define ONEMEG_DRIVE 5 #define SINGLE_SIDED 6 /* drive or diskette is 4MB type? */ #define DRIVE_PRESENT 7 #define DISK_IN 8 @@ -156,9 +156,9 @@ struct swim3 { #define TRACK_ZERO 10 #define TACHO 11 #define READ_DATA_1 12 -#define MFM_MODE 13 +#define GCR_MODE 13 #define SEEK_COMPLETE 14 -#define ONEMEG_MEDIA 15 +#define TWOMEG_MEDIA 15 /* Definitions of values used in writing and formatting */ #define DATA_ESCAPE 0x99 -- cgit From 8a500df63d07d8aee44b7ee2c54e462e47ce93ec Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Check drive type The SWIM chip is compatible with GCR-mode Sony 400K/800K drives but this driver only supports MFM mode. Therefore only Sony FDHD drives are supported. Skip incompatible drives. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index d1ee4670666a..c8c8b9da3edd 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -829,10 +829,12 @@ static int swim_floppy_init(struct swim_priv *swd) /* scan floppy drives */ swim_drive(base, INTERNAL_DRIVE); - if (swim_readbit(base, DRIVE_PRESENT)) + if (swim_readbit(base, DRIVE_PRESENT) && + !swim_readbit(base, ONEMEG_DRIVE)) swim_add_floppy(swd, INTERNAL_DRIVE); swim_drive(base, EXTERNAL_DRIVE); - if (swim_readbit(base, DRIVE_PRESENT)) + if (swim_readbit(base, DRIVE_PRESENT) && + !swim_readbit(base, ONEMEG_DRIVE)) swim_add_floppy(swd, EXTERNAL_DRIVE); /* register floppy drives */ -- cgit From 5a13388d7aa1177b98d7168330ecbeeac52f844d Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Fix IO error at end of medium Reading to the end of a 720K disk results in an IO error instead of EOF because the block layer thinks the disk has 2880 sectors. (Partly this is a result of inverted logic of the ONEMEG_MEDIA bit that's now fixed.) Initialize the density and head count in swim_add_floppy() to agree with the device size passed to set_capacity() during drive probe. Call set_capacity() again upon device open, after refreshing the density and head count values. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index c8c8b9da3edd..2c75761b61e8 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -612,7 +612,6 @@ static void setup_medium(struct floppy_state *fs) struct floppy_struct *g; fs->disk_in = 1; fs->write_protected = swim_readbit(base, WRITE_PROT); - fs->type = swim_readbit(base, TWOMEG_MEDIA); if (swim_track00(base)) printk(KERN_ERR @@ -620,6 +619,9 @@ static void setup_medium(struct floppy_state *fs) swim_track00(base); + fs->type = swim_readbit(base, TWOMEG_MEDIA) ? + HD_MEDIA : DD_MEDIA; + fs->head_number = swim_readbit(base, SINGLE_SIDED) ? 1 : 2; get_floppy_geometry(fs, 0, &g); fs->total_secs = g->size; fs->secpercyl = g->head * g->sect; @@ -656,6 +658,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) goto out; } + set_capacity(fs->disk, fs->total_secs); + if (mode & FMODE_NDELAY) return 0; @@ -808,10 +812,9 @@ static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) swim_motor(base, OFF); - if (swim_readbit(base, SINGLE_SIDED)) - fs->head_number = 1; - else - fs->head_number = 2; + fs->type = HD_MEDIA; + fs->head_number = 2; + fs->ref_count = 0; fs->ejected = 1; -- cgit From b3906535ccc6cd04c42f9b1c7e31d1947b3ebc74 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 11 Apr 2018 20:50:14 -0400 Subject: block/swim: Select appropriate drive on device open The driver supports internal and external FDD units so the floppy_open function must not hard-code the drive location. Cc: Laurent Vivier Cc: Jens Axboe Cc: stable@vger.kernel.org # v4.14+ Tested-by: Stan Johnson Signed-off-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Jens Axboe --- drivers/block/swim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 2c75761b61e8..0e31884a9519 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -648,7 +648,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) swim_write(base, setup, S_IBM_DRIVE | S_FCLK_DIV2); udelay(10); - swim_drive(base, INTERNAL_DRIVE); + swim_drive(base, fs->location); swim_motor(base, ON); swim_action(base, SETMFM); if (fs->ejected) -- cgit From f4560231ec42092c6662acccabb28c6cac9f5dfb Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Tue, 17 Apr 2018 11:46:20 +0800 Subject: blk-mq: start request gstate with gen 1 rq->gstate and rq->aborted_gstate both are zero before rqs are allocated. If we have a small timeout, when the timer fires, there could be rqs that are never allocated, and also there could be rq that has been allocated but not initialized and started. At the moment, the rq->gstate and rq->aborted_gstate both are 0, thus the blk_mq_terminate_expired will identify the rq is timed out and invoke .timeout early. For scsi, this will cause scsi_times_out to be invoked before the scsi_cmnd is not initialized, scsi_cmnd->device is still NULL at the moment, then we will get crash. Cc: Bart Van Assche Cc: Tejun Heo Cc: Ming Lei Cc: Martin Steigerwald Cc: stable@vger.kernel.org Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++++ block/blk-mq.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 3e77409e5a84..85909b431eb0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -201,6 +201,10 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->part = NULL; seqcount_init(&rq->gstate_seq); u64_stats_init(&rq->aborted_gstate_sync); + /* + * See comment of blk_mq_init_request + */ + WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); } EXPORT_SYMBOL(blk_rq_init); diff --git a/block/blk-mq.c b/block/blk-mq.c index 0dc9e341c2a7..e4aa36817367 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2042,6 +2042,13 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, seqcount_init(&rq->gstate_seq); u64_stats_init(&rq->aborted_gstate_sync); + /* + * start gstate with gen 1 instead of 0, otherwise it will be equal + * to aborted_gstate, and be identified timed out by + * blk_mq_terminate_expired. + */ + WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); + return 0; } -- cgit From 72961c4e6082be79825265d9193272b8a1634dec Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Apr 2018 17:08:52 -0600 Subject: bfq-iosched: ensure to clear bic/bfqq pointers when preparing request Even if we don't have an IO context attached to a request, we still need to clear the priv[0..1] pointers, as they could be pointing to previously used bic/bfqq structures. If we don't do so, we'll either corrupt memory on dispatching a request, or cause an imbalance in counters. Inspired by a fix from Kees. Reported-by: Oleksandr Natalenko Reported-by: Kees Cook Cc: stable@vger.kernel.org Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler") Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f0ecd98509d8..771ae9730ac6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4934,8 +4934,16 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) bool new_queue = false; bool bfqq_already_existing = false, split = false; - if (!rq->elv.icq) + /* + * Even if we don't have an icq attached, we should still clear + * the scheduler pointers, as they might point to previously + * allocated bic/bfqq structs. + */ + if (!rq->elv.icq) { + rq->elv.priv[0] = rq->elv.priv[1] = NULL; return; + } + bic = icq_to_bic(rq->elv.icq); spin_lock_irq(&bfqd->lock); -- cgit From 9de4ee40547fd315d4a0ed1dd15a2fa3559ad707 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Apr 2018 12:51:31 +0300 Subject: cdrom: information leak in cdrom_ioctl_media_changed() This cast is wrong. "cdi->capacity" is an int and "arg" is an unsigned long. The way the check is written now, if one of the high 32 bits is set then we could read outside the info->slots[] array. This bug is pretty old and it predates git. Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 8327478effd0..bfc566d3f31a 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2371,7 +2371,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || arg == CDSL_CURRENT) return media_changed(cdi, 1); - if ((unsigned int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; info = kmalloc(sizeof(*info), GFP_KERNEL); -- cgit From 0ce9144471de9ee09306ca0127e7cd27521ccc3f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 18 Apr 2018 14:08:27 +1000 Subject: block: add blk_queue_fua() helper function So we can check FUA support status from the iomap direct IO code. Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..c362aadfe036 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -737,6 +737,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) #define blk_queue_preempt_only(q) \ test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) +#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) extern int blk_set_preempt_only(struct request_queue *q); extern void blk_clear_preempt_only(struct request_queue *q); -- cgit From 946b81da114b8ba5c74bb01e57c0c6eca2bdc801 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 18 Apr 2018 08:37:18 -0600 Subject: blkcg: don't hold blkcg lock when deactivating policy As described in the comment of blkcg_activate_policy(), *Update of each blkg is protected by both queue and blkcg locks so that holding either lock and testing blkcg_policy_enabled() is always enough for dereferencing policy data.* with queue lock held, there is no need to hold blkcg lock in blkcg_deactivate_policy(). Similar case is in blkcg_activate_policy(), which has removed holding of blkcg lock in commit 4c55f4f9ad3001ac1fefdd8d8ca7641d18558e23. Signed-off-by: Jiang Biao Signed-off-by: Wen Yang CC: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1c16694ae145..21bc449d01c0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1410,9 +1410,6 @@ void blkcg_deactivate_policy(struct request_queue *q, __clear_bit(pol->plid, q->blkcg_pols); list_for_each_entry(blkg, &q->blkg_list, q_node) { - /* grab blkcg lock too while removing @pd from @blkg */ - spin_lock(&blkg->blkcg->lock); - if (blkg->pd[pol->plid]) { if (!blkg->pd[pol->plid]->offline && pol->pd_offline_fn) { @@ -1422,8 +1419,6 @@ void blkcg_deactivate_policy(struct request_queue *q, pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } - - spin_unlock(&blkg->blkcg->lock); } spin_unlock_irq(q->queue_lock); -- cgit From bea548831b8cee347181132eacd8b9711dfced92 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Thu, 19 Apr 2018 12:04:26 +0800 Subject: blkcg: small fix on comment in blkcg_init_queue The comment before blkg_create() in blkcg_init_queue() was moved from blkcg_activate_policy() by commit ec13b1d6f0a0457312e615, but it does not suit for the new context. Signed-off-by: Jiang Biao Signed-off-by: Wen Yang CC: Tejun Heo CC: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 21bc449d01c0..79da2a723b68 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1177,11 +1177,7 @@ int blkcg_init_queue(struct request_queue *q) preloaded = !radix_tree_preload(GFP_KERNEL); - /* - * Make sure the root blkg exists and count the existing blkgs. As - * @q is bypassing at this point, blkg_lookup_create() can't be - * used. Open code insertion. - */ + /* Make sure the root blkg exists. */ rcu_read_lock(); spin_lock_irq(q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); -- cgit From 901932a3f9b2b80352896be946c6d577c0a9652c Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Thu, 19 Apr 2018 12:06:09 +0800 Subject: blkcg: init root blkcg_gq under lock The initializing of q->root_blkg is currently outside of queue lock and rcu, so the blkg may be destroied before the initializing, which may cause dangling/null references. On the other side, the destroys of blkg are protected by queue lock or rcu. Put the initializing inside the queue lock and rcu to make it safer. Signed-off-by: Jiang Biao Signed-off-by: Wen Yang CC: Tejun Heo CC: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 79da2a723b68..eb85cb87c40f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1181,18 +1181,16 @@ int blkcg_init_queue(struct request_queue *q) rcu_read_lock(); spin_lock_irq(q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); + if (IS_ERR(blkg)) + goto err_unlock; + q->root_blkg = blkg; + q->root_rl.blkg = blkg; spin_unlock_irq(q->queue_lock); rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); - if (IS_ERR(blkg)) - return PTR_ERR(blkg); - - q->root_blkg = blkg; - q->root_rl.blkg = blkg; - ret = blk_throtl_init(q); if (ret) { spin_lock_irq(q->queue_lock); @@ -1200,6 +1198,13 @@ int blkcg_init_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); } return ret; + +err_unlock: + spin_unlock_irq(q->queue_lock); + rcu_read_unlock(); + if (preloaded) + radix_tree_preload_end(); + return PTR_ERR(blkg); } /** -- cgit From d88b6d04447bbfd0b55f57afb231bf6f5b38c8f0 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Thu, 19 Apr 2018 10:59:31 -0700 Subject: MAINTAINERS: Remove me as maintainer of bcache Too much to do with other projects. I've enjoyed working with everyone here, and hope to occasionally contribute on bcache. Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..f0066efc121d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2614,7 +2614,6 @@ S: Maintained F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) -M: Michael Lyle M: Kent Overstreet L: linux-bcache@vger.kernel.org W: http://bcache.evilpiepirate.org -- cgit From e9938f552fc279e87afcbff430cd6f8966ef31b3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Apr 2018 21:07:14 -0600 Subject: bcache: mark Coly Li as bcache maintainer Since Michael had to step back, Coly has agreed to be the new maintainer. Mark him as such. Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f0066efc121d..d7547101f2c7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2614,6 +2614,7 @@ S: Maintained F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) +M: Coly Li M: Kent Overstreet L: linux-bcache@vger.kernel.org W: http://bcache.evilpiepirate.org -- cgit From fe644072dfee069d97a66ea9a80f4bc461499e6a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 20 Apr 2018 10:29:51 +0200 Subject: block: mq: Add some minor doc for core structs As it came up in discussion on the mailing list that the semantic meaning of 'blk_mq_ctx' and 'blk_mq_hw_ctx' isn't completely obvious to everyone, let's add some minimal kerneldoc for a starter. Signed-off-by: Linus Walleij Signed-off-by: Jens Axboe --- block/blk-mq.h | 3 +++ include/linux/blk-mq.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/block/blk-mq.h b/block/blk-mq.h index 88c558f71819..89b5cd3a6c70 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -7,6 +7,9 @@ struct blk_mq_tag_set; +/** + * struct blk_mq_ctx - State for a software queue facing the submitting CPUs + */ struct blk_mq_ctx { struct { spinlock_t lock; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e3986f4b3461..ebc34a5686dc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,9 @@ struct blk_mq_tags; struct blk_flush_queue; +/** + * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device + */ struct blk_mq_hw_ctx { struct { spinlock_t lock; -- cgit From 4412efecf7fda3b8f9f18feed7938f2281f5ccbc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 25 Apr 2018 04:01:44 +0800 Subject: Revert "blk-mq: remove code for dealing with remapping queue" This reverts commit 37c7c6c76d431dd7ef9c29d95f6052bd425f004c. Turns out some drivers(most are FC drivers) may not use managed IRQ affinity, and has their customized .map_queues meantime, so still keep this code for avoiding regression. Reported-by: Laurence Oberman Tested-by: Laurence Oberman Tested-by: Christian Borntraeger Tested-by: Stefan Haberland Cc: Ewan Milne Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e4aa36817367..c3621453ad87 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2336,7 +2336,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_map_swqueue(struct request_queue *q) { - unsigned int i; + unsigned int i, hctx_idx; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -2353,8 +2353,23 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Map software to hardware queues. + * + * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { + hctx_idx = q->mq_map[i]; + /* unmapped hw queue can be remapped after CPU topo changed */ + if (!set->tags[hctx_idx] && + !__blk_mq_alloc_rq_map(set, hctx_idx)) { + /* + * If tags initialization fail for some hctx, + * that hctx won't be brought online. In this + * case, remap the current ctx to hctx[0] which + * is guaranteed to always have tags allocated + */ + q->mq_map[i] = 0; + } + ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue(q, i); @@ -2366,8 +2381,21 @@ static void blk_mq_map_swqueue(struct request_queue *q) mutex_unlock(&q->sysfs_lock); queue_for_each_hw_ctx(q, hctx, i) { - /* every hctx should get mapped by at least one CPU */ - WARN_ON(!hctx->nr_ctx); + /* + * If no software queues are mapped to this hardware queue, + * disable it and free the request entries. + */ + if (!hctx->nr_ctx) { + /* Never unmap queue 0. We need it as a + * fallback in case of a new remap fails + * allocation + */ + if (i && set->tags[i]) + blk_mq_free_map_and_requests(set, i); + + hctx->tags = NULL; + continue; + } hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); -- cgit