From a8394090a9129b40f9d90dcb7f4a49d60c727ca6 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:56 +0800 Subject: bcache: correct cache_dirty_target in __update_writeback_rate() __update_write_rate() uses a Proportion-Differentiation Controller algorithm to control writeback rate. A dirty target number is used in this PD controller to control writeback rate. A larger target number will make the writeback rate smaller, on the versus, a smaller target number will make the writeback rate larger. bcache uses the following steps to calculate the target number, 1) cache_sectors = all-buckets-of-cache-set * buckets-size 2) cache_dirty_target = cache_sectors * cached-device-writeback_percent 3) target = cache_dirty_target * (sectors-of-cached-device/sectors-of-all-cached-devices-of-this-cache-set) The calculation at step 1) for cache_sectors is incorrect, which does not consider dirty blocks occupied by flash only volume. A flash only volume can be took as a bcache device without cached device. All data sectors allocated for it are persistent on cache device and marked dirty, they are not touched by bcache writeback and garbage collection code. So data blocks of flash only volume should be ignore when calculating cache_sectors of cache set. Current code does not subtract dirty sectors of flash only volume, which results a larger target number from the above 3 steps. And in sequence the cache device's writeback rate is smaller then a correct value, writeback speed is slower on all cached devices. This patch fixes the incorrect slower writeback rate by subtracting dirty sectors of flash only volumes in __update_writeback_rate(). (Commit log composed by Coly Li to pass checkpatch.pl checking) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/bcache/writeback.c') diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c49022a8dc9d..5abe6472b66b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -21,7 +21,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); -- cgit From 9baf30972b5568d8b5bc8b3c46a6ec5b58100463 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:59 +0800 Subject: bcache: fix for gc and write-back race gc and write-back get raced (see the email "bcache get stucked" I sended before): gc thread write-back thread | |bch_writeback_thread() |bch_gc_thread() | | |==>read_dirty() |==>bch_btree_gc() | |==>btree_root() //get btree root | | //node write locker | |==>bch_btree_gc_root() | | |==>read_dirty_submit() | |==>write_dirty() | |==>continue_at(cl, | | write_dirty_finish, | | system_wq); | |==>write_dirty_finish()//excute | | //in system_wq | |==>bch_btree_insert() | |==>bch_btree_map_leaf_nodes() | |==>__bch_btree_map_nodes() | |==>btree_root //try to get btree | | //root node read | | //lock | |-----stuck here |==>bch_btree_set_root() |==>bch_journal_meta() |==>bch_journal() |==>journal_try_write() |==>journal_write_unlocked() //journal_full(&c->journal) | //condition satisfied |==>continue_at(cl, journal_write, system_wq); //try to excute | //journal_write in system_wq | //but work queue is excuting | //write_dirty_finish() |==>closure_sync(); //wait journal_write execute | //over and wake up gc, |-------------stuck here |==>release root node write locker This patch alloc a separate work-queue for write-back thread to avoid such race. (Commit log re-organized by Coly Li to pass checkpatch.pl checking) Signed-off-by: Tang Junhui Acked-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/md/bcache/writeback.c') diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 5abe6472b66b..34131439e28c 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -187,7 +187,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -207,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -516,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) -- cgit From 175206cf9ab63161dec74d9cd7f9992e062491f5 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Thu, 7 Sep 2017 01:28:53 +0800 Subject: bcache: initialize dirty stripes in flash_dev_run() bcache uses a Proportion-Differentiation Controller algorithm to control writeback rate to cached devices. In the PD controller algorithm, dirty stripes of thin flash device should not be counted in, because flash only volumes never write back dirty data. Currently dirty stripe counter for thin flash device is not initialized when the thin flash device starts. Which means the following calculation in PD controller will reference an undefined dirty stripes number, and all cached devices attached to the same cache set where the thin flash device lies on may have an inaccurate writeback rate. This patch calles bch_sectors_dirty_init() in flash_dev_run(), to correctly initialize dirty stripe counter when the thin flash device starts to run. This patch also does following parameter data type change, -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); to call this function conveniently in flash_dev_run(). (Commit log is composed by Coly Li) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md/bcache/writeback.c') diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 34131439e28c..e663ca082183 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -482,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) -- cgit