summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 10:23:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 10:23:25 -0700
commit73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
treec2fda8ca1273744d2e884d24189a15ac1a7d63c2 /block/blk-core.c
parent958f338e96f874a0d29442396d6adf9c1e17aa2d (diff)
parentb86d865cb1cae1e61527ea0b8977078bbf694328 (diff)
Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c106
1 files changed, 61 insertions, 45 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index ee33590f54eb..12550340418d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -42,7 +42,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
-#include "blk-wbt.h"
+#include "blk-rq-qos.h"
#ifdef CONFIG_DEBUG_FS
struct dentry *blk_debugfs_root;
@@ -715,6 +715,35 @@ void blk_set_queue_dying(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
+/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
+void blk_exit_queue(struct request_queue *q)
+{
+ /*
+ * Since the I/O scheduler exit code may access cgroup information,
+ * perform I/O scheduler exit before disassociating from the block
+ * cgroup controller.
+ */
+ if (q->elevator) {
+ ioc_clear_queue(q);
+ elevator_exit(q, q->elevator);
+ q->elevator = NULL;
+ }
+
+ /*
+ * Remove all references to @q from the block cgroup controller before
+ * restoring @q->queue_lock to avoid that restoring this pointer causes
+ * e.g. blkcg_print_blkgs() to crash.
+ */
+ blkcg_exit_queue(q);
+
+ /*
+ * Since the cgroup code may dereference the @q->backing_dev_info
+ * pointer, only decrease its reference count after having removed the
+ * association with the block cgroup controller.
+ */
+ bdi_put(q->backing_dev_info);
+}
+
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
@@ -762,9 +791,13 @@ void blk_cleanup_queue(struct request_queue *q)
* make sure all in-progress dispatch are completed because
* blk_freeze_queue() can only complete all requests, and
* dispatch may still be in-progress since we dispatch requests
- * from more than one contexts
+ * from more than one contexts.
+ *
+ * No need to quiesce queue if it isn't initialized yet since
+ * blk_freeze_queue() should be enough for cases of passthrough
+ * request.
*/
- if (q->mq_ops)
+ if (q->mq_ops && blk_queue_init_done(q))
blk_mq_quiesce_queue(q);
/* for synchronous bio-based driver finish in-flight integrity i/o */
@@ -780,30 +813,7 @@ void blk_cleanup_queue(struct request_queue *q)
*/
WARN_ON_ONCE(q->kobj.state_in_sysfs);
- /*
- * Since the I/O scheduler exit code may access cgroup information,
- * perform I/O scheduler exit before disassociating from the block
- * cgroup controller.
- */
- if (q->elevator) {
- ioc_clear_queue(q);
- elevator_exit(q, q->elevator);
- q->elevator = NULL;
- }
-
- /*
- * Remove all references to @q from the block cgroup controller before
- * restoring @q->queue_lock to avoid that restoring this pointer causes
- * e.g. blkcg_print_blkgs() to crash.
- */
- blkcg_exit_queue(q);
-
- /*
- * Since the cgroup code may dereference the @q->backing_dev_info
- * pointer, only decrease its reference count after having removed the
- * association with the block cgroup controller.
- */
- bdi_put(q->backing_dev_info);
+ blk_exit_queue(q);
if (q->mq_ops)
blk_mq_free_queue(q);
@@ -1180,6 +1190,7 @@ out_exit_flush_rq:
q->exit_rq_fn(q, q->fq->flush_rq);
out_free_flush_queue:
blk_free_flush_queue(q->fq);
+ q->fq = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1641,7 +1652,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
- wbt_requeue(q->rq_wb, rq);
+ rq_qos_requeue(q, rq);
if (rq->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1748,7 +1759,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
- wbt_done(q->rq_wb, req);
+ rq_qos_done(q, req);
/*
* Request may not have originated from ll_rw_blk. if not,
@@ -1982,7 +1993,6 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
int where = ELEVATOR_INSERT_SORT;
struct request *req, *free;
unsigned int request_count = 0;
- unsigned int wb_acct;
/*
* low level driver can indicate that it wants pages above a
@@ -2040,7 +2050,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}
get_rq:
- wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+ rq_qos_throttle(q, bio, q->queue_lock);
/*
* Grab a free request. This is might sleep but can not fail.
@@ -2050,7 +2060,7 @@ get_rq:
req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
if (IS_ERR(req)) {
blk_queue_exit(q);
- __wbt_done(q->rq_wb, wb_acct);
+ rq_qos_cleanup(q, bio);
if (PTR_ERR(req) == -ENOMEM)
bio->bi_status = BLK_STS_RESOURCE;
else
@@ -2059,7 +2069,7 @@ get_rq:
goto out_unlock;
}
- wbt_track(req, wb_acct);
+ rq_qos_track(q, req, bio);
/*
* After dropping the lock and possibly sleeping here, our request
@@ -2700,13 +2710,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (blk_do_io_stat(req)) {
- const int rw = rq_data_dir(req);
+ const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
int cpu;
cpu = part_stat_lock();
part = req->part;
- part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+ part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
part_stat_unlock();
}
}
@@ -2720,7 +2730,7 @@ void blk_account_io_done(struct request *req, u64 now)
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
unsigned long duration;
- const int rw = rq_data_dir(req);
+ const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
int cpu;
@@ -2728,10 +2738,10 @@ void blk_account_io_done(struct request *req, u64 now)
cpu = part_stat_lock();
part = req->part;
- part_stat_inc(cpu, part, ios[rw]);
- part_stat_add(cpu, part, ticks[rw], duration);
+ part_stat_inc(cpu, part, ios[sgrp]);
+ part_stat_add(cpu, part, ticks[sgrp], duration);
part_round_stats(req->q, cpu, part);
- part_dec_in_flight(req->q, part, rw);
+ part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
part_stat_unlock();
@@ -2751,9 +2761,9 @@ static bool blk_pm_allow_request(struct request *rq)
return rq->rq_flags & RQF_PM;
case RPM_SUSPENDED:
return false;
+ default:
+ return true;
}
-
- return true;
}
#else
static bool blk_pm_allow_request(struct request *rq)
@@ -2980,7 +2990,7 @@ void blk_start_request(struct request *req)
req->throtl_size = blk_rq_sectors(req);
#endif
req->rq_flags |= RQF_STATS;
- wbt_issue(req->q->rq_wb, req);
+ rq_qos_issue(req->q, req);
}
BUG_ON(blk_rq_is_complete(req));
@@ -3053,6 +3063,10 @@ EXPORT_SYMBOL_GPL(blk_steal_bios);
* Passing the result of blk_rq_bytes() as @nr_bytes guarantees
* %false return from this function.
*
+ * Note:
+ * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
+ * blk_rq_bytes() and in blk_update_request().
+ *
* Return:
* %false - this request doesn't have any more data
* %true - this request has more data
@@ -3200,7 +3214,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
blk_account_io_done(req, now);
if (req->end_io) {
- wbt_done(req->q->rq_wb, req);
+ rq_qos_done(q, req);
req->end_io(req, error);
} else {
if (blk_bidi_rq(req))
@@ -3763,9 +3777,11 @@ EXPORT_SYMBOL(blk_finish_plug);
*/
void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
{
- /* not support for RQF_PM and ->rpm_status in blk-mq yet */
- if (q->mq_ops)
+ /* Don't enable runtime PM for blk-mq until it is ready */
+ if (q->mq_ops) {
+ pm_runtime_disable(dev);
return;
+ }
q->dev = dev;
q->rpm_status = RPM_ACTIVE;