summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2024-01-23 18:58:26 +0100
committerJens Axboe <axboe@kernel.dk>2024-02-06 09:44:03 -0700
commitf814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 (patch)
treef423245937edd1e7cf7be59f948d04c8c66ee956
parentf3c89983cb4fc00be64eb0d5cbcfcdf2cacb965e (diff)
blk-wbt: Fix detection of dirty-throttled tasks
The detection of dirty-throttled tasks in blk-wbt has been subtly broken since its beginning in 2016. Namely if we are doing cgroup writeback and the throttled task is not in the root cgroup, balance_dirty_pages() will set dirty_sleep for the non-root bdi_writeback structure. However blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback structure. Thus detection of recently throttled tasks is not working in this case (we noticed this when we switched to cgroup v2 and suddently writeback was slow). Since blk-wbt has no easy way to get to proper bdi_writeback and furthermore its intention has always been to work on the whole device rather than on individual cgroups, just move the dirty_sleep timestamp from bdi_writeback to backing_dev_info. That fixes the checking for recently throttled task and saves memory for everybody as a bonus. CC: stable@vger.kernel.org Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()") Signed-off-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz [axboe: fixup indentation errors] Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-wbt.c4
-rw-r--r--include/linux/backing-dev-defs.h7
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/page-writeback.c2
4 files changed, 9 insertions, 6 deletions
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 5ba3cd574eac..0c0e270a8265 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -163,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
- return time_before(jiffies, wb->dirty_sleep + HZ);
+ return time_before(jiffies, bdi->last_bdp_sleep + HZ);
}
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index ae12696ec492..2ad261082bba 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -141,8 +141,6 @@ struct bdi_writeback {
struct delayed_work dwork; /* work item used for writeback */
struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
- unsigned long dirty_sleep; /* last wait */
-
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -179,6 +177,11 @@ struct backing_dev_info {
* any dirty wbs, which is depended upon by bdi_has_dirty().
*/
atomic_long_t tot_write_bandwidth;
+ /*
+ * Jiffies when last process was dirty throttled on this bdi. Used by
+ * blk-wbt.
+ */
+ unsigned long last_bdp_sleep;
struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1e3447bccdb1..e039d05304dd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
- wb->dirty_sleep = jiffies;
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
@@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
+ bdi->last_bdp_sleep = jiffies;
return cgwb_bdi_init(bdi);
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index cd4e4ae77c40..cc37fa7f3364 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1921,7 +1921,7 @@ pause:
break;
}
__set_current_state(TASK_KILLABLE);
- wb->dirty_sleep = now;
+ bdi->last_bdp_sleep = jiffies;
io_schedule_timeout(pause);
current->dirty_paused_when = now + pause;