summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio.c1
-rw-r--r--block/blk-core.c127
-rw-r--r--block/blk-exec.c10
-rw-r--r--block/blk-mq.c72
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/fops.c25
6 files changed, 137 insertions, 100 deletions
diff --git a/block/bio.c b/block/bio.c
index df45f4b996ac..a3c9ff23a036 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -282,6 +282,7 @@ void bio_init(struct bio *bio, struct bio_vec *table,
atomic_set(&bio->__bi_remaining, 1);
atomic_set(&bio->__bi_cnt, 1);
+ bio->bi_cookie = BLK_QC_T_NONE;
bio->bi_max_vecs = max_vecs;
bio->bi_io_vec = table;
diff --git a/block/blk-core.c b/block/blk-core.c
index 8eb0e08d5395..f008c38ae967 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -915,25 +915,22 @@ end_io:
return false;
}
-static blk_qc_t __submit_bio(struct bio *bio)
+static void __submit_bio(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
- blk_qc_t ret = BLK_QC_T_NONE;
if (unlikely(bio_queue_enter(bio) != 0))
- return BLK_QC_T_NONE;
+ return;
if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio))
goto queue_exit;
- if (disk->fops->submit_bio) {
- ret = disk->fops->submit_bio(bio);
- goto queue_exit;
+ if (!disk->fops->submit_bio) {
+ blk_mq_submit_bio(bio);
+ return;
}
- return blk_mq_submit_bio(bio);
-
+ disk->fops->submit_bio(bio);
queue_exit:
blk_queue_exit(disk->queue);
- return ret;
}
/*
@@ -955,10 +952,9 @@ queue_exit:
* bio_list_on_stack[1] contains bios that were submitted before the current
* ->submit_bio_bio, but that haven't been processed yet.
*/
-static blk_qc_t __submit_bio_noacct(struct bio *bio)
+static void __submit_bio_noacct(struct bio *bio)
{
struct bio_list bio_list_on_stack[2];
- blk_qc_t ret = BLK_QC_T_NONE;
BUG_ON(bio->bi_next);
@@ -975,7 +971,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
bio_list_on_stack[1] = bio_list_on_stack[0];
bio_list_init(&bio_list_on_stack[0]);
- ret = __submit_bio(bio);
+ __submit_bio(bio);
/*
* Sort new bios into those for a lower level and those for the
@@ -998,22 +994,19 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
current->bio_list = NULL;
- return ret;
}
-static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+static void __submit_bio_noacct_mq(struct bio *bio)
{
struct bio_list bio_list[2] = { };
- blk_qc_t ret;
current->bio_list = bio_list;
do {
- ret = __submit_bio(bio);
+ __submit_bio(bio);
} while ((bio = bio_list_pop(&bio_list[0])));
current->bio_list = NULL;
- return ret;
}
/**
@@ -1025,7 +1018,7 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
* systems and other upper level users of the block layer should use
* submit_bio() instead.
*/
-blk_qc_t submit_bio_noacct(struct bio *bio)
+void submit_bio_noacct(struct bio *bio)
{
/*
* We only want one ->submit_bio to be active at a time, else stack
@@ -1033,14 +1026,12 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
* to collect a list of requests submited by a ->submit_bio method while
* it is active, and then process them after it returned.
*/
- if (current->bio_list) {
+ if (current->bio_list)
bio_list_add(&current->bio_list[0], bio);
- return BLK_QC_T_NONE;
- }
-
- if (!bio->bi_bdev->bd_disk->fops->submit_bio)
- return __submit_bio_noacct_mq(bio);
- return __submit_bio_noacct(bio);
+ else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
+ __submit_bio_noacct_mq(bio);
+ else
+ __submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio_noacct);
@@ -1057,10 +1048,10 @@ EXPORT_SYMBOL(submit_bio_noacct);
* in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has
* been called.
*/
-blk_qc_t submit_bio(struct bio *bio)
+void submit_bio(struct bio *bio)
{
if (blkcg_punt_bio_submit(bio))
- return BLK_QC_T_NONE;
+ return;
/*
* If it's a regular read/write or a barrier with data attached,
@@ -1092,20 +1083,92 @@ blk_qc_t submit_bio(struct bio *bio)
if (unlikely(bio_op(bio) == REQ_OP_READ &&
bio_flagged(bio, BIO_WORKINGSET))) {
unsigned long pflags;
- blk_qc_t ret;
psi_memstall_enter(&pflags);
- ret = submit_bio_noacct(bio);
+ submit_bio_noacct(bio);
psi_memstall_leave(&pflags);
-
- return ret;
+ return;
}
- return submit_bio_noacct(bio);
+ submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio);
/**
+ * bio_poll - poll for BIO completions
+ * @bio: bio to poll for
+ * @flags: BLK_POLL_* flags that control the behavior
+ *
+ * Poll for completions on queue associated with the bio. Returns number of
+ * completed entries found.
+ *
+ * Note: the caller must either be the context that submitted @bio, or
+ * be in a RCU critical section to prevent freeing of @bio.
+ */
+int bio_poll(struct bio *bio, unsigned int flags)
+{
+ struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+ blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
+ int ret;
+
+ if (cookie == BLK_QC_T_NONE ||
+ !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ return 0;
+
+ if (current->plug)
+ blk_flush_plug_list(current->plug, false);
+
+ if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
+ return 0;
+ if (WARN_ON_ONCE(!queue_is_mq(q)))
+ ret = 0; /* not yet implemented, should not happen */
+ else
+ ret = blk_mq_poll(q, cookie, flags);
+ blk_queue_exit(q);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bio_poll);
+
+/*
+ * Helper to implement file_operations.iopoll. Requires the bio to be stored
+ * in iocb->private, and cleared before freeing the bio.
+ */
+int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags)
+{
+ struct bio *bio;
+ int ret = 0;
+
+ /*
+ * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can
+ * point to a freshly allocated bio at this point. If that happens
+ * we have a few cases to consider:
+ *
+ * 1) the bio is beeing initialized and bi_bdev is NULL. We can just
+ * simply nothing in this case
+ * 2) the bio points to a not poll enabled device. bio_poll will catch
+ * this and return 0
+ * 3) the bio points to a poll capable device, including but not
+ * limited to the one that the original bio pointed to. In this
+ * case we will call into the actual poll method and poll for I/O,
+ * even if we don't need to, but it won't cause harm either.
+ *
+ * For cases 2) and 3) above the RCU grace period ensures that bi_bdev
+ * is still allocated. Because partitions hold a reference to the whole
+ * device bdev and thus disk, the disk is also still valid. Grabbing
+ * a reference to the queue in bio_poll() ensures the hctxs and requests
+ * are still valid as well.
+ */
+ rcu_read_lock();
+ bio = READ_ONCE(kiocb->private);
+ if (bio && bio->bi_bdev)
+ ret = bio_poll(bio, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iocb_bio_iopoll);
+
+/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
* for the new queue limits
* @q: the queue
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 1fa7f25e5726..55f0cd34b37b 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -65,13 +65,19 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
static bool blk_rq_is_poll(struct request *rq)
{
- return rq->mq_hctx && rq->mq_hctx->type == HCTX_TYPE_POLL;
+ if (!rq->mq_hctx)
+ return false;
+ if (rq->mq_hctx->type != HCTX_TYPE_POLL)
+ return false;
+ if (WARN_ON_ONCE(!rq->bio))
+ return false;
+ return true;
}
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
do {
- blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0);
+ bio_poll(rq->bio, 0);
cond_resched();
} while (!completion_done(wait));
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a34ffcf861c3..0860f622099f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -65,6 +65,9 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
return bucket;
}
+#define BLK_QC_T_SHIFT 16
+#define BLK_QC_T_INTERNAL (1U << 31)
+
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
blk_qc_t qc)
{
@@ -81,6 +84,13 @@ static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
return blk_mq_tag_to_rq(hctx->tags, tag);
}
+static inline blk_qc_t blk_rq_to_qc(struct request *rq)
+{
+ return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
+ (rq->tag != -1 ?
+ rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
+}
+
/*
* Check if any of the ctx, dispatch list or elevator
* have pending work in this hardware queue.
@@ -819,6 +829,8 @@ void blk_mq_start_request(struct request *rq)
if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
q->integrity.profile->prepare_fn(rq);
#endif
+ if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
+ WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
}
EXPORT_SYMBOL(blk_mq_start_request);
@@ -2045,19 +2057,15 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
}
static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
- struct request *rq,
- blk_qc_t *cookie, bool last)
+ struct request *rq, bool last)
{
struct request_queue *q = rq->q;
struct blk_mq_queue_data bd = {
.rq = rq,
.last = last,
};
- blk_qc_t new_cookie;
blk_status_t ret;
- new_cookie = request_to_qc_t(hctx, rq);
-
/*
* For OK queue, we are done. For error, caller may kill it.
* Any other error (busy), just add it to our list as we
@@ -2067,7 +2075,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
switch (ret) {
case BLK_STS_OK:
blk_mq_update_dispatch_busy(hctx, false);
- *cookie = new_cookie;
break;
case BLK_STS_RESOURCE:
case BLK_STS_DEV_RESOURCE:
@@ -2076,7 +2083,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
break;
default:
blk_mq_update_dispatch_busy(hctx, false);
- *cookie = BLK_QC_T_NONE;
break;
}
@@ -2085,7 +2091,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
- blk_qc_t *cookie,
bool bypass_insert, bool last)
{
struct request_queue *q = rq->q;
@@ -2119,7 +2124,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
goto insert;
}
- return __blk_mq_issue_directly(hctx, rq, cookie, last);
+ return __blk_mq_issue_directly(hctx, rq, last);
insert:
if (bypass_insert)
return BLK_STS_RESOURCE;
@@ -2133,7 +2138,6 @@ insert:
* blk_mq_try_issue_directly - Try to send a request directly to device driver.
* @hctx: Pointer of the associated hardware queue.
* @rq: Pointer to request to be sent.
- * @cookie: Request queue cookie.
*
* If the device has enough resources to accept a new request now, send the
* request directly to device driver. Else, insert at hctx->dispatch queue, so
@@ -2141,7 +2145,7 @@ insert:
* queue have higher priority.
*/
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
- struct request *rq, blk_qc_t *cookie)
+ struct request *rq)
{
blk_status_t ret;
int srcu_idx;
@@ -2150,7 +2154,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
hctx_lock(hctx, &srcu_idx);
- ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
+ ret = __blk_mq_try_issue_directly(hctx, rq, false, true);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
blk_mq_request_bypass_insert(rq, false, true);
else if (ret != BLK_STS_OK)
@@ -2163,11 +2167,10 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
{
blk_status_t ret;
int srcu_idx;
- blk_qc_t unused_cookie;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
hctx_lock(hctx, &srcu_idx);
- ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
+ ret = __blk_mq_try_issue_directly(hctx, rq, true, last);
hctx_unlock(hctx, srcu_idx);
return ret;
@@ -2247,10 +2250,8 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
*
* It will not queue the request if there is an error with the bio, or at the
* request creation.
- *
- * Returns: Request queue cookie.
*/
-blk_qc_t blk_mq_submit_bio(struct bio *bio)
+void blk_mq_submit_bio(struct bio *bio)
{
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
const int is_sync = op_is_sync(bio->bi_opf);
@@ -2259,9 +2260,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
struct blk_plug *plug;
struct request *same_queue_rq = NULL;
unsigned int nr_segs;
- blk_qc_t cookie;
blk_status_t ret;
- bool hipri;
blk_queue_bounce(q, &bio);
__blk_queue_split(&bio, &nr_segs);
@@ -2278,8 +2277,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
rq_qos_throttle(q, bio);
- hipri = bio->bi_opf & REQ_POLLED;
-
plug = blk_mq_plug(q, bio);
if (plug && plug->cached_rq) {
rq = plug->cached_rq;
@@ -2310,8 +2307,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
rq_qos_track(q, rq, bio);
- cookie = request_to_qc_t(rq->mq_hctx, rq);
-
blk_mq_bio_to_request(rq, bio, nr_segs);
ret = blk_crypto_init_request(rq);
@@ -2319,7 +2314,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
bio->bi_status = ret;
bio_endio(bio);
blk_mq_free_request(rq);
- return BLK_QC_T_NONE;
+ return;
}
if (unlikely(is_flush_fua)) {
@@ -2375,7 +2370,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
if (same_queue_rq) {
trace_block_unplug(q, 1, true);
blk_mq_try_issue_directly(same_queue_rq->mq_hctx,
- same_queue_rq, &cookie);
+ same_queue_rq);
}
} else if ((q->nr_hw_queues > 1 && is_sync) ||
!rq->mq_hctx->dispatch_busy) {
@@ -2383,18 +2378,15 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
* There is no scheduler and we can try to send directly
* to the hardware.
*/
- blk_mq_try_issue_directly(rq->mq_hctx, rq, &cookie);
+ blk_mq_try_issue_directly(rq->mq_hctx, rq);
} else {
/* Default case. */
blk_mq_sched_insert_request(rq, false, true, true);
}
- if (!hipri)
- return BLK_QC_T_NONE;
- return cookie;
+ return;
queue_exit:
blk_queue_exit(q);
- return BLK_QC_T_NONE;
}
static size_t order_to_size(unsigned int order)
@@ -4084,25 +4076,8 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
return 0;
}
-/**
- * blk_poll - poll for IO completions
- * @q: the queue
- * @cookie: cookie passed back at IO submission time
- * @flags: BLK_POLL_* flags that control the behavior
- *
- * Description:
- * Poll for completions on the passed in queue. Returns number of
- * completed entries found.
- */
-int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
{
- if (cookie == BLK_QC_T_NONE ||
- !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
- return 0;
-
- if (current->plug)
- blk_flush_plug_list(current->plug, false);
-
if (!(flags & BLK_POLL_NOSLEEP) &&
q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
if (blk_mq_poll_hybrid(q, cookie))
@@ -4110,7 +4085,6 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
}
return blk_mq_poll_classic(q, cookie, flags);
}
-EXPORT_SYMBOL_GPL(blk_poll);
unsigned int blk_mq_rq_cpu(struct request *rq)
{
diff --git a/block/blk-mq.h b/block/blk-mq.h
index a9fe01e14951..8be447995106 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -37,6 +37,8 @@ struct blk_mq_ctx {
struct kobject kobj;
} ____cacheline_aligned_in_smp;
+void blk_mq_submit_bio(struct bio *bio);
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags);
void blk_mq_exit_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
diff --git a/block/fops.c b/block/fops.c
index db8f2fe68dd2..ce1255529ba2 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -61,7 +61,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bool should_dirty = false;
struct bio bio;
ssize_t ret;
- blk_qc_t qc;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
@@ -102,13 +101,12 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_HIPRI)
bio_set_polled(&bio, iocb);
- qc = submit_bio(&bio);
+ submit_bio(&bio);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(bio.bi_private))
break;
- if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc, 0))
+ if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0))
blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -141,14 +139,6 @@ struct blkdev_dio {
static struct bio_set blkdev_dio_pool;
-static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags)
-{
- struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
- struct request_queue *q = bdev_get_queue(bdev);
-
- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags);
-}
-
static void blkdev_bio_end_io(struct bio *bio)
{
struct blkdev_dio *dio = bio->bi_private;
@@ -162,6 +152,8 @@ static void blkdev_bio_end_io(struct bio *bio)
struct kiocb *iocb = dio->iocb;
ssize_t ret;
+ WRITE_ONCE(iocb->private, NULL);
+
if (likely(!dio->bio.bi_status)) {
ret = dio->size;
iocb->ki_pos += ret;
@@ -200,7 +192,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
- blk_qc_t qc = BLK_QC_T_NONE;
int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
@@ -262,9 +253,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (!nr_pages) {
if (do_poll)
bio_set_polled(bio, iocb);
- qc = submit_bio(bio);
+ submit_bio(bio);
if (do_poll)
- WRITE_ONCE(iocb->ki_cookie, qc);
+ WRITE_ONCE(iocb->private, bio);
break;
}
if (!dio->multi_bio) {
@@ -297,7 +288,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (!READ_ONCE(dio->waiter))
break;
- if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0))
+ if (!do_poll || !bio_poll(bio, 0))
blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -594,7 +585,7 @@ const struct file_operations def_blk_fops = {
.llseek = blkdev_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
- .iopoll = blkdev_iopoll,
+ .iopoll = iocb_bio_iopoll,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = blkdev_ioctl,