diff options
Diffstat (limited to 'block/blk-core.c')
| -rw-r--r-- | block/blk-core.c | 133 |
1 files changed, 82 insertions, 51 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 82c3ae22d76d..8387fe50ea15 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -94,20 +94,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) } EXPORT_SYMBOL(blk_queue_flag_clear); -/** - * blk_queue_flag_test_and_set - atomically test and set a queue flag - * @flag: flag to be set - * @q: request queue - * - * Returns the previous value of @flag - 0 if the flag was not set and 1 if - * the flag was already set. - */ -bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q) -{ - return test_and_set_bit(flag, &q->queue_flags); -} -EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set); - #define REQ_OP_NAME(name) [REQ_OP_##name] = #name static const char *const blk_op_name[] = { REQ_OP_NAME(READ), @@ -174,6 +160,8 @@ static const struct { /* Command duration limit device-side timeout */ [BLK_STS_DURATION_LIMIT] = { -ETIME, "duration limit exceeded" }, + [BLK_STS_INVAL] = { -EINVAL, "invalid" }, + /* everything else not covered above: */ [BLK_STS_IOERR] = { -EIO, "I/O" }, }; @@ -231,7 +219,7 @@ EXPORT_SYMBOL_GPL(blk_status_to_str); */ void blk_sync_queue(struct request_queue *q) { - del_timer_sync(&q->timeout); + timer_delete_sync(&q->timeout); cancel_work_sync(&q->timeout_work); } EXPORT_SYMBOL(blk_sync_queue); @@ -273,6 +261,8 @@ static void blk_free_queue(struct request_queue *q) blk_mq_release(q); ida_free(&blk_queue_ida, q->id); + lockdep_unregister_key(&q->io_lock_cls_key); + lockdep_unregister_key(&q->q_lock_cls_key); call_rcu(&q->rcu_head, blk_free_queue_rcu); } @@ -290,18 +280,20 @@ void blk_put_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_put_queue); -void blk_queue_start_drain(struct request_queue *q) +bool blk_queue_start_drain(struct request_queue *q) { /* * When queue DYING flag is set, we need to block new req * entering queue, so we call blk_freeze_queue_start() to * prevent I/O from crossing blk_queue_enter(). */ - blk_freeze_queue_start(q); + bool freeze = __blk_freeze_queue_start(q, current); if (queue_is_mq(q)) blk_mq_wake_waiters(q); /* Make blk_queue_enter() reexamine the DYING flag. */ wake_up_all(&q->mq_freeze_wq); + + return freeze; } /** @@ -333,6 +325,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) return -ENODEV; } + rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_); + rwsem_release(&q->q_lockdep_map, _RET_IP_); return 0; } @@ -364,6 +358,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio) goto dead; } + rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_); + rwsem_release(&q->io_lockdep_map, _RET_IP_); return 0; dead: bio_io_error(bio); @@ -385,7 +381,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) static void blk_rq_timed_out_timer(struct timer_list *t) { - struct request_queue *q = from_timer(q, t, timeout); + struct request_queue *q = timer_container_of(q, t, timeout); kblockd_schedule_work(&q->timeout_work); } @@ -433,8 +429,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); + mutex_init(&q->elevator_lock); mutex_init(&q->sysfs_lock); - mutex_init(&q->sysfs_dir_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); spin_lock_init(&q->queue_lock); @@ -453,6 +449,18 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) PERCPU_REF_INIT_ATOMIC, GFP_KERNEL); if (error) goto fail_stats; + lockdep_register_key(&q->io_lock_cls_key); + lockdep_register_key(&q->q_lock_cls_key); + lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)", + &q->io_lock_cls_key, 0); + lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)", + &q->q_lock_cls_key, 0); + + /* Teach lockdep about lock ordering (reclaim WRT queue freeze lock). */ + fs_reclaim_acquire(GFP_KERNEL); + rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_); + rwsem_release(&q->io_lockdep_map, _RET_IP_); + fs_reclaim_release(GFP_KERNEL); q->nr_requests = BLKDEV_DEFAULT_RQ; @@ -531,7 +539,7 @@ static inline void bio_check_ro(struct bio *bio) } } -static noinline int should_fail_bio(struct bio *bio) +int should_fail_bio(struct bio *bio) { if (should_fail_request(bdev_whole(bio->bi_bdev), bio->bi_iter.bi_size)) return -EIO; @@ -552,6 +560,8 @@ static inline int bio_check_eod(struct bio *bio) if (nr_sectors && (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { + if (!maxsector) + return -EIO; pr_info_ratelimited("%s: attempt to access beyond end of device\n" "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n", current->comm, bio->bi_bdev, bio->bi_opf, @@ -605,7 +615,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_IOERR; /* Make sure the BIO is small enough and will not get split */ - if (nr_sectors > queue_max_zone_append_sectors(q)) + if (nr_sectors > q->limits.max_zone_append_sectors) return BLK_STS_IOERR; bio->bi_opf |= REQ_NOMERGE; @@ -627,8 +637,14 @@ static void __submit_bio(struct bio *bio) blk_mq_submit_bio(bio); } else if (likely(bio_queue_enter(bio) == 0)) { struct gendisk *disk = bio->bi_bdev->bd_disk; - - disk->fops->submit_bio(bio); + + if ((bio->bi_opf & REQ_POLLED) && + !(disk->queue->limits.features & BLK_FEAT_POLL)) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + } else { + disk->fops->submit_bio(bio); + } blk_queue_exit(disk->queue); } @@ -646,13 +662,13 @@ static void __submit_bio(struct bio *bio) * bio_list of new bios to be added. ->submit_bio() may indeed add some more * bios through a recursive call to submit_bio_noacct. If it did, we find a * non-NULL value in bio_list and re-enter the loop from the top. - * - In this case we really did just take the bio of the top of the list (no + * - In this case we really did just take the bio off the top of the list (no * pretending) and so remove it from bio_list, and call into ->submit_bio() * again. * * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. * bio_list_on_stack[1] contains bios that were submitted before the current - * ->submit_bio, but that haven't been processed yet. + * ->submit_bio(), but that haven't been processed yet. */ static void __submit_bio_noacct(struct bio *bio) { @@ -711,10 +727,9 @@ static void __submit_bio_noacct_mq(struct bio *bio) current->bio_list = NULL; } -void submit_bio_noacct_nocheck(struct bio *bio) +void submit_bio_noacct_nocheck(struct bio *bio, bool split) { blk_cgroup_bio_start(bio); - blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_queue(bio); @@ -728,15 +743,31 @@ void submit_bio_noacct_nocheck(struct bio *bio) /* * We only want one ->submit_bio to be active at a time, else stack * usage with stacked devices could be a problem. Use current->bio_list - * to collect a list of requests submited by a ->submit_bio method while - * it is active, and then process them after it returned. + * to collect a list of requests submitted by a ->submit_bio method + * while it is active, and then process them after it returned. */ - if (current->bio_list) - bio_list_add(¤t->bio_list[0], bio); - else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) + if (current->bio_list) { + if (split) + bio_list_add_head(¤t->bio_list[0], bio); + else + bio_list_add(¤t->bio_list[0], bio); + } else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) { __submit_bio_noacct_mq(bio); - else + } else { __submit_bio_noacct(bio); + } +} + +static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q, + struct bio *bio) +{ + if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q)) + return BLK_STS_INVAL; + + if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q)) + return BLK_STS_INVAL; + + return BLK_STS_OK; } /** @@ -782,7 +813,7 @@ void submit_bio_noacct(struct bio *bio) if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE && bio_op(bio) != REQ_OP_ZONE_APPEND)) goto end_io; - if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { + if (!bdev_write_cache(bdev)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); if (!bio_sectors(bio)) { status = BLK_STS_OK; @@ -791,12 +822,15 @@ void submit_bio_noacct(struct bio *bio) } } - if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - bio_clear_polled(bio); - switch (bio_op(bio)) { case REQ_OP_READ: + break; case REQ_OP_WRITE: + if (bio->bi_opf & REQ_ATOMIC) { + status = blk_validate_atomic_write_op_size(q, bio); + if (status != BLK_STS_OK) + goto end_io; + } break; case REQ_OP_FLUSH: /* @@ -825,11 +859,8 @@ void submit_bio_noacct(struct bio *bio) case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: - if (!bdev_is_zoned(bio->bi_bdev)) - goto not_supported; - break; case REQ_OP_ZONE_RESET_ALL: - if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q)) + if (!bdev_is_zoned(bio->bi_bdev)) goto not_supported; break; case REQ_OP_DRV_IN: @@ -845,7 +876,7 @@ void submit_bio_noacct(struct bio *bio) if (blk_throtl_bio(bio)) return; - submit_bio_noacct_nocheck(bio); + submit_bio_noacct_nocheck(bio, false); return; not_supported: @@ -870,7 +901,7 @@ static void bio_set_ioprio(struct bio *bio) * * submit_bio() is used to submit I/O requests to block devices. It is passed a * fully set up &struct bio that describes the I/O that needs to be done. The - * bio will be send to the device described by the bi_bdev field. + * bio will be sent to the device described by the bi_bdev field. * * The success/failure status of the request, along with notification of * completion, is delivered asynchronously through the ->bi_end_io() callback @@ -915,8 +946,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) return 0; q = bdev_get_queue(bdev); - if (cookie == BLK_QC_T_NONE || - !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + if (cookie == BLK_QC_T_NONE) return 0; blk_flush_plug(current->plug, false); @@ -937,7 +967,8 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) } else { struct gendisk *disk = q->disk; - if (disk && disk->fops->poll_bio) + if ((q->limits.features & BLK_FEAT_POLL) && disk && + disk->fops->poll_bio) ret = disk->fops->poll_bio(bio, iob, flags); } blk_queue_exit(q); @@ -960,7 +991,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, * point to a freshly allocated bio at this point. If that happens * we have a few cases to consider: * - * 1) the bio is beeing initialized and bi_bdev is NULL. We can just + * 1) the bio is being initialized and bi_bdev is NULL. We can just * simply nothing in this case * 2) the bio points to a not poll enabled device. bio_poll will catch * this and return 0 @@ -992,7 +1023,7 @@ again: stamp = READ_ONCE(part->bd_stamp); if (unlikely(time_after(now, stamp)) && likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) && - (end || part_in_flight(part))) + (end || bdev_count_inflight(part))) __part_stat_add(part, io_ticks, now - stamp); if (bdev_is_partition(part)) { @@ -1101,8 +1132,8 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) return; plug->cur_ktime = 0; - plug->mq_list = NULL; - plug->cached_rq = NULL; + rq_list_init(&plug->mq_list); + rq_list_init(&plug->cached_rqs); plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); plug->rq_count = 0; plug->multiple_queues = false; @@ -1198,7 +1229,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) * queue for cached requests, we don't want a blocked task holding * up a queue freeze/quiesce event. */ - if (unlikely(!rq_list_empty(plug->cached_rq))) + if (unlikely(!rq_list_empty(&plug->cached_rqs))) blk_mq_free_plug_rqs(plug); plug->cur_ktime = 0; |
