From 614822f81f606e0064acdae11d9ec1efd3db4190 Mon Sep 17 00:00:00 2001
From: Luca Miccio <lucmiccio@gmail.com>
Date: Mon, 13 Nov 2017 07:34:08 +0100
Subject: block, bfq: add missing invocations of
 bfqg_stats_update_io_add/remove

bfqg_stats_update_io_add and bfqg_stats_update_io_remove are to be
invoked, respectively, when an I/O request enters and when an I/O
request exits the scheduler. Unfortunately, bfq does not fully comply
with this scheme, because it does not invoke these functions for
requests that are inserted into or extracted from its priority
dispatch list. This commit fixes this mistake.

Tested-by: Lee Tibbert <lee.tibbert@gmail.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Luca Miccio <lucmiccio@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 889a8549d97f..91703eba63f0 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1359,7 +1359,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 			bfqq->ttime.last_end_request +
 			bfqd->bfq_slice_idle * 3;
 
-	bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags);
 
 	/*
 	 * bfqq deserves to be weight-raised if:
@@ -1633,7 +1632,6 @@ static void bfq_remove_request(struct request_queue *q,
 	if (rq->cmd_flags & REQ_META)
 		bfqq->meta_pending--;
 
-	bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
 }
 
 static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
@@ -1746,6 +1744,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
 		bfqq->next_rq = rq;
 
 	bfq_remove_request(q, next);
+	bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
 
 	spin_unlock_irq(&bfqq->bfqd->lock);
 end:
@@ -3700,6 +3699,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	spin_lock_irq(&bfqd->lock);
 
 	rq = __bfq_dispatch_request(hctx);
+	if (rq && RQ_BFQQ(rq))
+		bfqg_stats_update_io_remove(bfqq_group(RQ_BFQQ(rq)),
+					    rq->cmd_flags);
 	spin_unlock_irq(&bfqd->lock);
 
 	return rq;
@@ -4224,6 +4226,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct request_queue *q = hctx->queue;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
 
 	spin_lock_irq(&bfqd->lock);
 	if (blk_mq_sched_try_insert_merge(q, rq)) {
@@ -4243,6 +4246,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 			list_add_tail(&rq->queuelist, &bfqd->dispatch);
 	} else {
 		__bfq_insert_request(bfqd, rq);
+		/*
+		 * Update bfqq, because, if a queue merge has occurred
+		 * in __bfq_insert_request, then rq has been
+		 * redirected into a new queue.
+		 */
+		bfqq = RQ_BFQQ(rq);
 
 		if (rq_mergeable(rq)) {
 			elv_rqhash_add(q, rq);
@@ -4251,6 +4260,9 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		}
 	}
 
+	if (bfqq)
+		bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, rq->cmd_flags);
+
 	spin_unlock_irq(&bfqd->lock);
 }
 
@@ -4428,8 +4440,11 @@ static void bfq_finish_request(struct request *rq)
 		 * lock is held.
 		 */
 
-		if (!RB_EMPTY_NODE(&rq->rb_node))
+		if (!RB_EMPTY_NODE(&rq->rb_node)) {
 			bfq_remove_request(rq->q, rq);
+			bfqg_stats_update_io_remove(bfqq_group(bfqq),
+						    rq->cmd_flags);
+		}
 		bfq_put_rq_priv_body(bfqq);
 	}
 
-- 
cgit 


From 24bfd19bb7890255693ee5cb6dc100d8d215d00b Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 13 Nov 2017 07:34:09 +0100
Subject: block, bfq: update blkio stats outside the scheduler lock

bfq invokes various blkg_*stats_* functions to update the statistics
contained in the special files blkio.bfq.* in the blkio controller
groups, i.e., the I/O accounting related to the proportional-share
policy provided by bfq. The execution of these functions takes a
considerable percentage, about 40%, of the total per-request execution
time of bfq (i.e., of the sum of the execution time of all the bfq
functions that have to be executed to process an I/O request from its
creation to its destruction).  This reduces the request-processing
rate sustainable by bfq noticeably, even on a multicore CPU. In fact,
the bfq functions that invoke blkg_*stats_* functions cannot be
executed in parallel with the rest of the code of bfq, because both
are executed under the same same per-device scheduler lock.

To reduce this slowdown, this commit moves, wherever possible, the
invocation of these functions (more precisely, of the bfq functions
that invoke blkg_*stats_* functions) outside the critical sections
protected by the scheduler lock.

With this change, and with all blkio.bfq.* statistics enabled, the
throughput grows, e.g., from 250 to 310 KIOPS (+25%) on an Intel
i7-4850HQ, in case of 8 threads doing random I/O in parallel on
null_blk, with the latter configured with 0 latency. We obtained the
same or higher throughput boosts, up to +30%, with other processors
(some figures are reported in the documentation). For our tests, we
used the script [1], with which our results can be easily reproduced.

NOTE. This commit still protects the invocation of blkg_*stats_*
functions with the request_queue lock, because the group these
functions are invoked on may otherwise disappear before or while these
functions are executed.  Fortunately, tests without even this lock
show, by difference, that the serialization caused by this lock has a
little impact (at most ~5% of throughput reduction).

[1] https://github.com/Algodev-github/IOSpeed

Tested-by: Lee Tibbert <lee.tibbert@gmail.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Luca Miccio <lucmiccio@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++------
 block/bfq-wf2q.c    |   1 -
 2 files changed, 99 insertions(+), 12 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 91703eba63f0..69e05f861daf 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2228,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
 				       struct bfq_queue *bfqq)
 {
 	if (bfqq) {
-		bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
 		bfq_clear_bfqq_fifo_expire(bfqq);
 
 		bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
@@ -3469,7 +3468,6 @@ check_queue:
 				 */
 				bfq_clear_bfqq_wait_request(bfqq);
 				hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
-				bfqg_stats_update_idle_time(bfqq_group(bfqq));
 			}
 			goto keep_queue;
 		}
@@ -3695,15 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
 	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
 	struct request *rq;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	struct bfq_queue *in_serv_queue, *bfqq;
+	bool waiting_rq, idle_timer_disabled;
+#endif
 
 	spin_lock_irq(&bfqd->lock);
 
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	in_serv_queue = bfqd->in_service_queue;
+	waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
+
+	rq = __bfq_dispatch_request(hctx);
+
+	idle_timer_disabled =
+		waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+
+#else
 	rq = __bfq_dispatch_request(hctx);
-	if (rq && RQ_BFQQ(rq))
-		bfqg_stats_update_io_remove(bfqq_group(RQ_BFQQ(rq)),
-					    rq->cmd_flags);
+#endif
 	spin_unlock_irq(&bfqd->lock);
 
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	bfqq = rq ? RQ_BFQQ(rq) : NULL;
+	if (!idle_timer_disabled && !bfqq)
+		return rq;
+
+	/*
+	 * rq and bfqq are guaranteed to exist until this function
+	 * ends, for the following reasons. First, rq can be
+	 * dispatched to the device, and then can be completed and
+	 * freed, only after this function ends. Second, rq cannot be
+	 * merged (and thus freed because of a merge) any longer,
+	 * because it has already started. Thus rq cannot be freed
+	 * before this function ends, and, since rq has a reference to
+	 * bfqq, the same guarantee holds for bfqq too.
+	 *
+	 * In addition, the following queue lock guarantees that
+	 * bfqq_group(bfqq) exists as well.
+	 */
+	spin_lock_irq(hctx->queue->queue_lock);
+	if (idle_timer_disabled)
+		/*
+		 * Since the idle timer has been disabled,
+		 * in_serv_queue contained some request when
+		 * __bfq_dispatch_request was invoked above, which
+		 * implies that rq was picked exactly from
+		 * in_serv_queue. Thus in_serv_queue == bfqq, and is
+		 * therefore guaranteed to exist because of the above
+		 * arguments.
+		 */
+		bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
+	if (bfqq) {
+		struct bfq_group *bfqg = bfqq_group(bfqq);
+
+		bfqg_stats_update_avg_queue_size(bfqg);
+		bfqg_stats_set_start_empty_time(bfqg);
+		bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
+	}
+	spin_unlock_irq(hctx->queue->queue_lock);
+#endif
+
 	return rq;
 }
 
@@ -4161,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		 */
 		bfq_clear_bfqq_wait_request(bfqq);
 		hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
-		bfqg_stats_update_idle_time(bfqq_group(bfqq));
 
 		/*
 		 * The queue is not empty, because a new request just
@@ -4176,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	}
 }
 
-static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
+/* returns true if it causes the idle timer to be disabled */
+static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
 {
 	struct bfq_queue *bfqq = RQ_BFQQ(rq),
 		*new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+	bool waiting, idle_timer_disabled = false;
 
 	if (new_bfqq) {
 		if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
@@ -4213,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
 		bfqq = new_bfqq;
 	}
 
+	waiting = bfqq && bfq_bfqq_wait_request(bfqq);
 	bfq_add_request(rq);
+	idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
 
 	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
 	list_add_tail(&rq->queuelist, &bfqq->fifo);
 
 	bfq_rq_enqueued(bfqd, bfqq, rq);
+
+	return idle_timer_disabled;
 }
 
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -4226,7 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct request_queue *q = hctx->queue;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	bool idle_timer_disabled = false;
+	unsigned int cmd_flags;
+#endif
 
 	spin_lock_irq(&bfqd->lock);
 	if (blk_mq_sched_try_insert_merge(q, rq)) {
@@ -4245,13 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		else
 			list_add_tail(&rq->queuelist, &bfqd->dispatch);
 	} else {
-		__bfq_insert_request(bfqd, rq);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
 		/*
 		 * Update bfqq, because, if a queue merge has occurred
 		 * in __bfq_insert_request, then rq has been
 		 * redirected into a new queue.
 		 */
 		bfqq = RQ_BFQQ(rq);
+#else
+		__bfq_insert_request(bfqd, rq);
+#endif
 
 		if (rq_mergeable(rq)) {
 			elv_rqhash_add(q, rq);
@@ -4260,10 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		}
 	}
 
-	if (bfqq)
-		bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, rq->cmd_flags);
-
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	/*
+	 * Cache cmd_flags before releasing scheduler lock, because rq
+	 * may disappear afterwards (for example, because of a request
+	 * merge).
+	 */
+	cmd_flags = rq->cmd_flags;
+#endif
 	spin_unlock_irq(&bfqd->lock);
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	if (!bfqq)
+		return;
+	/*
+	 * bfqq still exists, because it can disappear only after
+	 * either it is merged with another queue, or the process it
+	 * is associated with exits. But both actions must be taken by
+	 * the same process currently executing this flow of
+	 * instruction.
+	 *
+	 * In addition, the following queue lock guarantees that
+	 * bfqq_group(bfqq) exists as well.
+	 */
+	spin_lock_irq(q->queue_lock);
+	bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
+	if (idle_timer_disabled)
+		bfqg_stats_update_idle_time(bfqq_group(bfqq));
+	spin_unlock_irq(q->queue_lock);
+#endif
 }
 
 static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 414ba686a847..e495d3f9b4b0 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -843,7 +843,6 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
 		st->vtime += bfq_delta(served, st->wsum);
 		bfq_forget_idle(st);
 	}
-	bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
 	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
 }
 
-- 
cgit 


From a33801e8b4735b8d473f963e5854172f9cde3e8b Mon Sep 17 00:00:00 2001
From: Luca Miccio <lucmiccio@gmail.com>
Date: Mon, 13 Nov 2017 07:34:10 +0100
Subject: block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP

BFQ currently creates, and updates, its own instance of the whole
set of blkio statistics that cfq creates. Yet, from the comments
of Tejun Heo in [1], it turned out that most of these statistics
are meant/useful only for debugging. This commit makes BFQ create
the latter, debugging statistics only if the option
CONFIG_DEBUG_BLK_CGROUP is set.

By doing so, this commit also enables BFQ to enjoy a high perfomance
boost. The reason is that, if CONFIG_DEBUG_BLK_CGROUP is not set, then
BFQ has to update far fewer statistics, and, in particular, not the
heaviest to update.  To give an idea of the benefits, if
CONFIG_DEBUG_BLK_CGROUP is not set, then, on an Intel i7-4850HQ, and
with 8 threads doing random I/O in parallel on null_blk (configured
with 0 latency), the throughput of BFQ grows from 310 to 400 KIOPS
(+30%). We have measured similar or even much higher boosts with other
CPUs: e.g., +45% with an ARM CortexTM-A53 Octa-core. Our results have
been obtained and can be reproduced very easily with the script in [1].

[1] https://www.spinics.net/lists/linux-block/msg18943.html

Suggested-by: Tejun Heo <tj@kernel.org>
Suggested-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Lee Tibbert <lee.tibbert@gmail.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Luca Miccio <lucmiccio@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c  | 148 +++++++++++++++++++++++++++++-----------------------
 block/bfq-iosched.c |  14 ++---
 block/bfq-iosched.h |   4 +-
 3 files changed, 93 insertions(+), 73 deletions(-)

(limited to 'block')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index ceefb9a706d6..da1525ec4c87 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -24,7 +24,7 @@
 
 #include "bfq-iosched.h"
 
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) &&  defined(CONFIG_DEBUG_BLK_CGROUP)
 
 /* bfqg stats flags */
 enum bfqg_stats_flags {
@@ -152,6 +152,57 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
 	bfqg_stats_update_group_wait_time(stats);
 }
 
+void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
+			      unsigned int op)
+{
+	blkg_rwstat_add(&bfqg->stats.queued, op, 1);
+	bfqg_stats_end_empty_time(&bfqg->stats);
+	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
+		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
+}
+
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
+{
+	blkg_rwstat_add(&bfqg->stats.queued, op, -1);
+}
+
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
+{
+	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
+}
+
+void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
+				  uint64_t io_start_time, unsigned int op)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+	unsigned long long now = sched_clock();
+
+	if (time_after64(now, io_start_time))
+		blkg_rwstat_add(&stats->service_time, op,
+				now - io_start_time);
+	if (time_after64(io_start_time, start_time))
+		blkg_rwstat_add(&stats->wait_time, op,
+				io_start_time - start_time);
+}
+
+#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
+
+void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
+			      unsigned int op) { }
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
+void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
+				  uint64_t io_start_time, unsigned int op) { }
+void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
+void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
+void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
+void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
+void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
+
+#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
 /*
  * blk-cgroup policy-related handlers
  * The following functions help in converting between blk-cgroup
@@ -229,42 +280,10 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg)
 	blkg_put(bfqg_to_blkg(bfqg));
 }
 
-void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
-			      unsigned int op)
-{
-	blkg_rwstat_add(&bfqg->stats.queued, op, 1);
-	bfqg_stats_end_empty_time(&bfqg->stats);
-	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
-		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
-}
-
-void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
-{
-	blkg_rwstat_add(&bfqg->stats.queued, op, -1);
-}
-
-void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
-{
-	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
-}
-
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
-				  uint64_t io_start_time, unsigned int op)
-{
-	struct bfqg_stats *stats = &bfqg->stats;
-	unsigned long long now = sched_clock();
-
-	if (time_after64(now, io_start_time))
-		blkg_rwstat_add(&stats->service_time, op,
-				now - io_start_time);
-	if (time_after64(io_start_time, start_time))
-		blkg_rwstat_add(&stats->wait_time, op,
-				io_start_time - start_time);
-}
-
 /* @stats = 0 */
 static void bfqg_stats_reset(struct bfqg_stats *stats)
 {
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 	/* queued stats shouldn't be cleared */
 	blkg_rwstat_reset(&stats->merged);
 	blkg_rwstat_reset(&stats->service_time);
@@ -276,6 +295,7 @@ static void bfqg_stats_reset(struct bfqg_stats *stats)
 	blkg_stat_reset(&stats->group_wait_time);
 	blkg_stat_reset(&stats->idle_time);
 	blkg_stat_reset(&stats->empty_time);
+#endif
 }
 
 /* @to += @from */
@@ -284,6 +304,7 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
 	if (!to || !from)
 		return;
 
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 	/* queued stats shouldn't be cleared */
 	blkg_rwstat_add_aux(&to->merged, &from->merged);
 	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
@@ -296,6 +317,7 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
 	blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
 	blkg_stat_add_aux(&to->idle_time, &from->idle_time);
 	blkg_stat_add_aux(&to->empty_time, &from->empty_time);
+#endif
 }
 
 /*
@@ -342,6 +364,7 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
 
 static void bfqg_stats_exit(struct bfqg_stats *stats)
 {
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 	blkg_rwstat_exit(&stats->merged);
 	blkg_rwstat_exit(&stats->service_time);
 	blkg_rwstat_exit(&stats->wait_time);
@@ -353,10 +376,12 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
 	blkg_stat_exit(&stats->group_wait_time);
 	blkg_stat_exit(&stats->idle_time);
 	blkg_stat_exit(&stats->empty_time);
+#endif
 }
 
 static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
 {
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 	if (blkg_rwstat_init(&stats->merged, gfp) ||
 	    blkg_rwstat_init(&stats->service_time, gfp) ||
 	    blkg_rwstat_init(&stats->wait_time, gfp) ||
@@ -371,6 +396,7 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
 		bfqg_stats_exit(stats);
 		return -ENOMEM;
 	}
+#endif
 
 	return 0;
 }
@@ -887,6 +913,7 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
 	return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
 }
 
+#ifdef CONFIG_DEBUG_BLK_CGROUP
 static int bfqg_print_stat(struct seq_file *sf, void *v)
 {
 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
@@ -991,6 +1018,7 @@ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
 			  0, false);
 	return 0;
 }
+#endif /* CONFIG_DEBUG_BLK_CGROUP */
 
 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
 {
@@ -1028,15 +1056,6 @@ struct cftype bfq_blkcg_legacy_files[] = {
 	},
 
 	/* statistics, covers only the tasks in the bfqg */
-	{
-		.name = "bfq.time",
-		.private = offsetof(struct bfq_group, stats.time),
-		.seq_show = bfqg_print_stat,
-	},
-	{
-		.name = "bfq.sectors",
-		.seq_show = bfqg_print_stat_sectors,
-	},
 	{
 		.name = "bfq.io_service_bytes",
 		.private = (unsigned long)&blkcg_policy_bfq,
@@ -1047,6 +1066,16 @@ struct cftype bfq_blkcg_legacy_files[] = {
 		.private = (unsigned long)&blkcg_policy_bfq,
 		.seq_show = blkg_print_stat_ios,
 	},
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	{
+		.name = "bfq.time",
+		.private = offsetof(struct bfq_group, stats.time),
+		.seq_show = bfqg_print_stat,
+	},
+	{
+		.name = "bfq.sectors",
+		.seq_show = bfqg_print_stat_sectors,
+	},
 	{
 		.name = "bfq.io_service_time",
 		.private = offsetof(struct bfq_group, stats.service_time),
@@ -1067,17 +1096,9 @@ struct cftype bfq_blkcg_legacy_files[] = {
 		.private = offsetof(struct bfq_group, stats.queued),
 		.seq_show = bfqg_print_rwstat,
 	},
+#endif /* CONFIG_DEBUG_BLK_CGROUP */
 
 	/* the same statictics which cover the bfqg and its descendants */
-	{
-		.name = "bfq.time_recursive",
-		.private = offsetof(struct bfq_group, stats.time),
-		.seq_show = bfqg_print_stat_recursive,
-	},
-	{
-		.name = "bfq.sectors_recursive",
-		.seq_show = bfqg_print_stat_sectors_recursive,
-	},
 	{
 		.name = "bfq.io_service_bytes_recursive",
 		.private = (unsigned long)&blkcg_policy_bfq,
@@ -1088,6 +1109,16 @@ struct cftype bfq_blkcg_legacy_files[] = {
 		.private = (unsigned long)&blkcg_policy_bfq,
 		.seq_show = blkg_print_stat_ios_recursive,
 	},
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	{
+		.name = "bfq.time_recursive",
+		.private = offsetof(struct bfq_group, stats.time),
+		.seq_show = bfqg_print_stat_recursive,
+	},
+	{
+		.name = "bfq.sectors_recursive",
+		.seq_show = bfqg_print_stat_sectors_recursive,
+	},
 	{
 		.name = "bfq.io_service_time_recursive",
 		.private = offsetof(struct bfq_group, stats.service_time),
@@ -1132,6 +1163,7 @@ struct cftype bfq_blkcg_legacy_files[] = {
 		.private = offsetof(struct bfq_group, stats.dequeue),
 		.seq_show = bfqg_print_stat,
 	},
+#endif	/* CONFIG_DEBUG_BLK_CGROUP */
 	{ }	/* terminate */
 };
 
@@ -1147,18 +1179,6 @@ struct cftype bfq_blkg_files[] = {
 
 #else	/* CONFIG_BFQ_GROUP_IOSCHED */
 
-void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
-			      unsigned int op) { }
-void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
-void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
-				  uint64_t io_start_time, unsigned int op) { }
-void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
-void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
-void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
-void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
-void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
-
 void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		   struct bfq_group *bfqg) {}
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 69e05f861daf..bcb6d21baf12 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3693,14 +3693,14 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
 	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
 	struct request *rq;
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	struct bfq_queue *in_serv_queue, *bfqq;
 	bool waiting_rq, idle_timer_disabled;
 #endif
 
 	spin_lock_irq(&bfqd->lock);
 
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	in_serv_queue = bfqd->in_service_queue;
 	waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
 
@@ -3714,7 +3714,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 #endif
 	spin_unlock_irq(&bfqd->lock);
 
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	bfqq = rq ? RQ_BFQQ(rq) : NULL;
 	if (!idle_timer_disabled && !bfqq)
 		return rq;
@@ -4281,7 +4281,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct request_queue *q = hctx->queue;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
 	bool idle_timer_disabled = false;
 	unsigned int cmd_flags;
@@ -4304,7 +4304,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		else
 			list_add_tail(&rq->queuelist, &bfqd->dispatch);
 	} else {
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
 		/*
 		 * Update bfqq, because, if a queue merge has occurred
@@ -4323,7 +4323,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		}
 	}
 
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	/*
 	 * Cache cmd_flags before releasing scheduler lock, because rq
 	 * may disappear afterwards (for example, because of a request
@@ -4333,7 +4333,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 #endif
 	spin_unlock_irq(&bfqd->lock);
 
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	if (!bfqq)
 		return;
 	/*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ac0809c72c98..91c4390903a1 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -689,7 +689,7 @@ enum bfqq_expiration {
 };
 
 struct bfqg_stats {
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 	/* number of ios merged */
 	struct blkg_rwstat		merged;
 	/* total time spent on device in ns, may not be accurate w/ queueing */
@@ -717,7 +717,7 @@ struct bfqg_stats {
 	uint64_t			start_idle_time;
 	uint64_t			start_empty_time;
 	uint16_t			flags;
-#endif	/* CONFIG_BFQ_GROUP_IOSCHED */
+#endif	/* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
 };
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
-- 
cgit 


From 34d9715ac1edd50285168dd8d80c972739a4f6a4 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 16 Nov 2017 08:08:44 +0800
Subject: block: wake up all tasks blocked in get_request()

Once blk_set_queue_dying() is done in blk_cleanup_queue(), we call
blk_freeze_queue() and wait for q->q_usage_counter becoming zero. But
if there are tasks blocked in get_request(), q->q_usage_counter can
never become zero. So we have to wake up all these tasks in
blk_set_queue_dying() first.

Fixes: 3ef28e83ab157997 ("block: generic request_queue reference counting")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 7c54c195e79e..1038706edd87 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -637,8 +637,8 @@ void blk_set_queue_dying(struct request_queue *q)
 		spin_lock_irq(q->queue_lock);
 		blk_queue_for_each_rl(rl, q) {
 			if (rl->rq_pool) {
-				wake_up(&rl->wait[BLK_RW_SYNC]);
-				wake_up(&rl->wait[BLK_RW_ASYNC]);
+				wake_up_all(&rl->wait[BLK_RW_SYNC]);
+				wake_up_all(&rl->wait[BLK_RW_ASYNC]);
 			}
 		}
 		spin_unlock_irq(q->queue_lock);
-- 
cgit 


From 62530ed8b1d07a45dec94d46e521c0c6c2d476e6 Mon Sep 17 00:00:00 2001
From: Michael Lyle <mlyle@lyle.org>
Date: Thu, 16 Nov 2017 23:47:25 -0800
Subject: bio: ensure __bio_clone_fast copies bi_partno

A new field was introduced in 74d46992e0d9, bi_partno, instead of using
bdev->bd_contains and encoding the partition information in the bi_bdev
field.  __bio_clone_fast was changed to copy the disk information, but
not the partition information.  At minimum, this regressed bcache and
caused data corruption.

Signed-off-by: Michael Lyle <mlyle@lyle.org>
Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index")
Reported-by: Pavel Goran <via-bcache@pvgoran.name>
Reported-by: Campbell Steven <casteven@gmail.com>
Reviewed-by: Coly Li <colyli@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Cc: <stable@vger.kernel.org> # 4.14
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index b94a802f8ba3..459cc857f3d9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -597,6 +597,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	 * so we don't set nor calculate new physical/hw segment counts here
 	 */
 	bio->bi_disk = bio_src->bi_disk;
+	bio->bi_partno = bio_src->bi_partno;
 	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_write_hint = bio_src->bi_write_hint;
-- 
cgit