From b817525a4a80c04e4ca44192d97a1ffa9f2be572 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 2 Oct 2015 14:47:05 -0400 Subject: writeback: bdi_writeback iteration must not skip dying ones bdi_for_each_wb() is used in several places to wake up or issue writeback work items to all wb's (bdi_writeback's) on a given bdi. The iteration is performed by walking bdi->cgwb_tree; however, the tree only indexes wb's which are currently active. For example, when a memcg gets associated with a different blkcg, the old wb is removed from the tree so that the new one can be indexed. The old wb starts dying from then on but will linger till all its inodes are drained. As these dying wb's may still host dirty inodes, writeback operations which affect all wb's must include them. bdi_for_each_wb() skipping dying wb's led to sync(2) missing and failing to sync the inodes belonging to those wb's. This patch adds a RCU protected @bdi->wb_list which lists all wb's beloinging to that bdi. wb's are added on creation and removed on release rather than on the start of destruction. bdi_for_each_wb() usages are replaced with list_for_each[_continue]_rcu() iterations over @bdi->wb_list and bdi_for_each_wb() and its helpers are removed. v2: Updated as per Jan. last_wb ref leak in bdi_split_work_to_wbs() fixed and unnecessary list head severing in cgwb_bdi_destroy() removed. Signed-off-by: Tejun Heo Reported-and-tested-by: Artem Bityutskiy Fixes: ebe41ab0c79d ("writeback: implement bdi_for_each_wb()") Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com Cc: Jan Kara Signed-off-by: Jens Axboe --- mm/backing-dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2df8ddcb0ca0..e92d77937fd3 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -480,6 +480,10 @@ static void cgwb_release_workfn(struct work_struct *work) release_work); struct backing_dev_info *bdi = wb->bdi; + spin_lock_irq(&cgwb_lock); + list_del_rcu(&wb->bdi_node); + spin_unlock_irq(&cgwb_lock); + wb_shutdown(wb); css_put(wb->memcg_css); @@ -575,6 +579,7 @@ static int cgwb_create(struct backing_dev_info *bdi, ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb); if (!ret) { atomic_inc(&bdi->usage_cnt); + list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); css_get(memcg_css); @@ -764,15 +769,22 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } int bdi_init(struct backing_dev_info *bdi) { + int ret; + bdi->dev = NULL; bdi->min_ratio = 0; bdi->max_ratio = 100; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); + INIT_LIST_HEAD(&bdi->wb_list); init_waitqueue_head(&bdi->wb_waitq); - return cgwb_bdi_init(bdi); + ret = cgwb_bdi_init(bdi); + + list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); + + return ret; } EXPORT_SYMBOL(bdi_init); -- cgit