summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c136
1 files changed, 83 insertions, 53 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index b795ac177281..b862c66018f2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -94,20 +94,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
}
EXPORT_SYMBOL(blk_queue_flag_clear);
-/**
- * blk_queue_flag_test_and_set - atomically test and set a queue flag
- * @flag: flag to be set
- * @q: request queue
- *
- * Returns the previous value of @flag - 0 if the flag was not set and 1 if
- * the flag was already set.
- */
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
-{
- return test_and_set_bit(flag, &q->queue_flags);
-}
-EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
-
#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
static const char *const blk_op_name[] = {
REQ_OP_NAME(READ),
@@ -174,6 +160,8 @@ static const struct {
/* Command duration limit device-side timeout */
[BLK_STS_DURATION_LIMIT] = { -ETIME, "duration limit exceeded" },
+ [BLK_STS_INVAL] = { -EINVAL, "invalid" },
+
/* everything else not covered above: */
[BLK_STS_IOERR] = { -EIO, "I/O" },
};
@@ -231,7 +219,7 @@ EXPORT_SYMBOL_GPL(blk_status_to_str);
*/
void blk_sync_queue(struct request_queue *q)
{
- del_timer_sync(&q->timeout);
+ timer_delete_sync(&q->timeout);
cancel_work_sync(&q->timeout_work);
}
EXPORT_SYMBOL(blk_sync_queue);
@@ -273,6 +261,8 @@ static void blk_free_queue(struct request_queue *q)
blk_mq_release(q);
ida_free(&blk_queue_ida, q->id);
+ lockdep_unregister_key(&q->io_lock_cls_key);
+ lockdep_unregister_key(&q->q_lock_cls_key);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
@@ -290,18 +280,20 @@ void blk_put_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_put_queue);
-void blk_queue_start_drain(struct request_queue *q)
+bool blk_queue_start_drain(struct request_queue *q)
{
/*
* When queue DYING flag is set, we need to block new req
* entering queue, so we call blk_freeze_queue_start() to
* prevent I/O from crossing blk_queue_enter().
*/
- blk_freeze_queue_start(q);
+ bool freeze = __blk_freeze_queue_start(q, current);
if (queue_is_mq(q))
blk_mq_wake_waiters(q);
/* Make blk_queue_enter() reexamine the DYING flag. */
wake_up_all(&q->mq_freeze_wq);
+
+ return freeze;
}
/**
@@ -333,6 +325,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
return -ENODEV;
}
+ rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_);
+ rwsem_release(&q->q_lockdep_map, _RET_IP_);
return 0;
}
@@ -364,6 +358,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
goto dead;
}
+ rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
+ rwsem_release(&q->io_lockdep_map, _RET_IP_);
return 0;
dead:
bio_io_error(bio);
@@ -433,8 +429,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
refcount_set(&q->refs, 1);
mutex_init(&q->debugfs_mutex);
+ mutex_init(&q->elevator_lock);
mutex_init(&q->sysfs_lock);
- mutex_init(&q->sysfs_dir_lock);
mutex_init(&q->limits_lock);
mutex_init(&q->rq_qos_mutex);
spin_lock_init(&q->queue_lock);
@@ -453,6 +449,18 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
if (error)
goto fail_stats;
+ lockdep_register_key(&q->io_lock_cls_key);
+ lockdep_register_key(&q->q_lock_cls_key);
+ lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)",
+ &q->io_lock_cls_key, 0);
+ lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)",
+ &q->q_lock_cls_key, 0);
+
+ /* Teach lockdep about lock ordering (reclaim WRT queue freeze lock). */
+ fs_reclaim_acquire(GFP_KERNEL);
+ rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
+ rwsem_release(&q->io_lockdep_map, _RET_IP_);
+ fs_reclaim_release(GFP_KERNEL);
q->nr_requests = BLKDEV_DEFAULT_RQ;
@@ -496,7 +504,8 @@ __setup("fail_make_request=", setup_fail_make_request);
bool should_fail_request(struct block_device *part, unsigned int bytes)
{
- return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
+ return bdev_test_flag(part, BD_MAKE_IT_FAIL) &&
+ should_fail(&fail_make_request, bytes);
}
static int __init fail_make_request_debugfs(void)
@@ -516,10 +525,11 @@ static inline void bio_check_ro(struct bio *bio)
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return;
- if (bio->bi_bdev->bd_ro_warned)
+ if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED))
return;
- bio->bi_bdev->bd_ro_warned = true;
+ bdev_set_flag(bio->bi_bdev, BD_RO_WARNED);
+
/*
* Use ioctl to set underlying disk of raid/dm to read-only
* will trigger this.
@@ -591,8 +601,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_NOTSUPP;
/* The bio sector must point to the start of a sequential zone */
- if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector) ||
- !bio_zone_is_seq(bio))
+ if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector))
return BLK_STS_IOERR;
/*
@@ -614,17 +623,30 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
static void __submit_bio(struct bio *bio)
{
+ /* If plug is not used, add new plug here to cache nsecs time. */
+ struct blk_plug plug;
+
if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
- if (!bio->bi_bdev->bd_has_submit_bio) {
+ blk_start_plug(&plug);
+
+ if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) {
blk_mq_submit_bio(bio);
} else if (likely(bio_queue_enter(bio) == 0)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
-
- disk->fops->submit_bio(bio);
+
+ if ((bio->bi_opf & REQ_POLLED) &&
+ !(disk->queue->limits.features & BLK_FEAT_POLL)) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ } else {
+ disk->fops->submit_bio(bio);
+ }
blk_queue_exit(disk->queue);
}
+
+ blk_finish_plug(&plug);
}
/*
@@ -725,12 +747,24 @@ void submit_bio_noacct_nocheck(struct bio *bio)
*/
if (current->bio_list)
bio_list_add(&current->bio_list[0], bio);
- else if (!bio->bi_bdev->bd_has_submit_bio)
+ else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO))
__submit_bio_noacct_mq(bio);
else
__submit_bio_noacct(bio);
}
+static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
+ struct bio *bio)
+{
+ if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
+ return BLK_STS_INVAL;
+
+ if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
+ return BLK_STS_INVAL;
+
+ return BLK_STS_OK;
+}
+
/**
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
@@ -761,7 +795,8 @@ void submit_bio_noacct(struct bio *bio)
if (!bio_flagged(bio, BIO_REMAPPED)) {
if (unlikely(bio_check_eod(bio)))
goto end_io;
- if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
+ if (bdev_is_partition(bdev) &&
+ unlikely(blk_partition_remap(bio)))
goto end_io;
}
@@ -773,7 +808,7 @@ void submit_bio_noacct(struct bio *bio)
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE &&
bio_op(bio) != REQ_OP_ZONE_APPEND))
goto end_io;
- if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+ if (!bdev_write_cache(bdev)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!bio_sectors(bio)) {
status = BLK_STS_OK;
@@ -782,12 +817,15 @@ void submit_bio_noacct(struct bio *bio)
}
}
- if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
- bio_clear_polled(bio);
-
switch (bio_op(bio)) {
case REQ_OP_READ:
+ break;
case REQ_OP_WRITE:
+ if (bio->bi_opf & REQ_ATOMIC) {
+ status = blk_validate_atomic_write_op_size(q, bio);
+ if (status != BLK_STS_OK)
+ goto end_io;
+ }
break;
case REQ_OP_FLUSH:
/*
@@ -816,11 +854,8 @@ void submit_bio_noacct(struct bio *bio)
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
- if (!bdev_is_zoned(bio->bi_bdev))
- goto not_supported;
- break;
case REQ_OP_ZONE_RESET_ALL:
- if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
+ if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
case REQ_OP_DRV_IN:
@@ -906,16 +941,9 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
return 0;
q = bdev_get_queue(bdev);
- if (cookie == BLK_QC_T_NONE ||
- !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ if (cookie == BLK_QC_T_NONE)
return 0;
- /*
- * As the requests that require a zone lock are not plugged in the
- * first place, directly accessing the plug instead of using
- * blk_mq_plug() should not have any consequences during flushing for
- * zoned devices.
- */
blk_flush_plug(current->plug, false);
/*
@@ -934,7 +962,8 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
} else {
struct gendisk *disk = q->disk;
- if (disk && disk->fops->poll_bio)
+ if ((q->limits.features & BLK_FEAT_POLL) && disk &&
+ disk->fops->poll_bio)
ret = disk->fops->poll_bio(bio, iob, flags);
}
blk_queue_exit(q);
@@ -987,11 +1016,12 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
unsigned long stamp;
again:
stamp = READ_ONCE(part->bd_stamp);
- if (unlikely(time_after(now, stamp))) {
- if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
- __part_stat_add(part, io_ticks, end ? now - stamp : 1);
- }
- if (part->bd_partno) {
+ if (unlikely(time_after(now, stamp)) &&
+ likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) &&
+ (end || bdev_count_inflight(part)))
+ __part_stat_add(part, io_ticks, now - stamp);
+
+ if (bdev_is_partition(part)) {
part = bdev_whole(part);
goto again;
}
@@ -1097,8 +1127,8 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
return;
plug->cur_ktime = 0;
- plug->mq_list = NULL;
- plug->cached_rq = NULL;
+ rq_list_init(&plug->mq_list);
+ rq_list_init(&plug->cached_rqs);
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
plug->rq_count = 0;
plug->multiple_queues = false;
@@ -1194,7 +1224,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
* queue for cached requests, we don't want a blocked task holding
* up a queue freeze/quiesce event.
*/
- if (unlikely(!rq_list_empty(plug->cached_rq)))
+ if (unlikely(!rq_list_empty(&plug->cached_rqs)))
blk_mq_free_plug_rqs(plug);
plug->cur_ktime = 0;