summaryrefslogtreecommitdiff
path: root/block/bfq-cgroup.c
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2023-01-03 15:54:56 +0100
committerJens Axboe <axboe@kernel.dk>2023-01-29 15:18:32 -0700
commit9778369a2d6c5ed2b81a04164c4aa9da1bdb193d (patch)
tree6d7979698eebb57245a94a49786b8b3e0686585a /block/bfq-cgroup.c
parent6d796c50f84ca79f1722bb131799e5a5710c4700 (diff)
block, bfq: split sync bfq_queues on a per-actuator basis
Single-LUN multi-actuator SCSI drives, as well as all multi-actuator SATA drives appear as a single device to the I/O subsystem [1]. Yet they address commands to different actuators internally, as a function of Logical Block Addressing (LBAs). A given sector is reachable by only one of the actuators. For example, Seagate’s Serial Advanced Technology Attachment (SATA) version contains two actuators and maps the lower half of the SATA LBA space to the lower actuator and the upper half to the upper actuator. Evidently, to fully utilize actuators, no actuator must be left idle or underutilized while there is pending I/O for it. The block layer must somehow control the load of each actuator individually. This commit lays the ground for allowing BFQ to provide such a per-actuator control. BFQ associates an I/O-request sync bfq_queue with each process doing synchronous I/O, or with a group of processes, in case of queue merging. Then BFQ serves one bfq_queue at a time. While in service, a bfq_queue is emptied in request-position order. Yet the same process, or group of processes, may generate I/O for different actuators. In this case, different streams of I/O (each for a different actuator) get all inserted into the same sync bfq_queue. So there is basically no individual control on when each stream is served, i.e., on when the I/O requests of the stream are picked from the bfq_queue and dispatched to the drive. This commit enables BFQ to control the service of each actuator individually for synchronous I/O, by simply splitting each sync bfq_queue into N queues, one for each actuator. In other words, a sync bfq_queue is now associated to a pair (process, actuator). As a consequence of this split, the per-queue proportional-share policy implemented by BFQ will guarantee that the sync I/O generated for each actuator, by each process, receives its fair share of service. This is just a preparatory patch. If the I/O of the same process happens to be sent to different queues, then each of these queues may undergo queue merging. To handle this event, the bfq_io_cq data structure must be properly extended. In addition, stable merging must be disabled to avoid loss of control on individual actuators. Finally, also async queues must be split. These issues are described in detail and addressed in next commits. As for this commit, although multiple per-process bfq_queues are provided, the I/O of each process or group of processes is still sent to only one queue, regardless of the actuator the I/O is for. The forwarding to distinct bfq_queues will be enabled after addressing the above issues. [1] https://www.linaro.org/blog/budget-fair-queueing-bfq-linux-io-scheduler-optimizations-for-multi-actuator-sata-hard-drives/ Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com> Signed-off-by: Gabriele Felici <felicigb@gmail.com> Signed-off-by: Carmine Zaccagnino <carmine@carminezacc.com> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Link: https://lore.kernel.org/r/20230103145503.71712-2-paolo.valente@linaro.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/bfq-cgroup.c')
-rw-r--r--block/bfq-cgroup.c91
1 files changed, 49 insertions, 42 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 7d9b15f0dbd5..5f081f4d51fb 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -712,6 +712,46 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_put_queue(bfqq);
}
+static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
+ struct bfq_queue *sync_bfqq,
+ struct bfq_io_cq *bic,
+ struct bfq_group *bfqg,
+ unsigned int act_idx)
+{
+ struct bfq_queue *bfqq;
+
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ return;
+ }
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data != &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is not valid
+ * anymore. We cannot easily just cancel the merge (by
+ * clearing new_bfqq) as there may be other processes
+ * using this queue and holding refs to all queues
+ * below sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from bfqq now
+ * so that we cannot merge bio to a request from the
+ * old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, true, act_idx);
+ }
+}
+
/**
* __bfq_bic_change_cgroup - move @bic to @bfqg.
* @bfqd: the queue descriptor.
@@ -726,53 +766,20 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
struct bfq_io_cq *bic,
struct bfq_group *bfqg)
{
- struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false);
- struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true);
- struct bfq_entity *entity;
+ unsigned int act_idx;
- if (async_bfqq) {
- entity = &async_bfqq->entity;
+ for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx);
+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx);
- if (entity->sched_data != &bfqg->sched_data) {
- bic_set_bfqq(bic, NULL, false);
+ if (async_bfqq &&
+ async_bfqq->entity.sched_data != &bfqg->sched_data) {
+ bic_set_bfqq(bic, NULL, false, act_idx);
bfq_release_process_ref(bfqd, async_bfqq);
}
- }
- if (sync_bfqq) {
- if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
- /* We are the only user of this bfqq, just move it */
- if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
- } else {
- struct bfq_queue *bfqq;
-
- /*
- * The queue was merged to a different queue. Check
- * that the merge chain still belongs to the same
- * cgroup.
- */
- for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
- if (bfqq->entity.sched_data !=
- &bfqg->sched_data)
- break;
- if (bfqq) {
- /*
- * Some queue changed cgroup so the merge is
- * not valid anymore. We cannot easily just
- * cancel the merge (by clearing new_bfqq) as
- * there may be other processes using this
- * queue and holding refs to all queues below
- * sync_bfqq->new_bfqq. Similarly if the merge
- * already happened, we need to detach from
- * bfqq now so that we cannot merge bio to a
- * request from the old cgroup.
- */
- bfq_put_cooperator(sync_bfqq);
- bfq_release_process_ref(bfqd, sync_bfqq);
- bic_set_bfqq(bic, NULL, true);
- }
- }
+ if (sync_bfqq)
+ bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx);
}
}