From fb4100ae7f312c3d614b37621c2b17b3b7cf65f8 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 20 May 2015 10:30:32 +0100 Subject: dm cache: fix race when issuing a POLICY_REPLACE operation There is a race between a policy deciding to replace a cache entry, the core target writing back any dirty data from this block, and other IO threads doing IO to the same block. This sort of problem is avoided most of the time by the core target grabbing a bio prison cell before making the request to the policy. But for a demotion the core target doesn't know which block will be demoted, so can't do this in advance. Fix this demotion race by introducing a callback to the policy interface that allows the policy to grab the cell on behalf of the core target. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-cache-target.c | 58 +++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 41b2594a80c6..d5982480630b 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1439,16 +1439,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) &cache->stats.read_miss : &cache->stats.write_miss); } +/*----------------------------------------------------------------*/ + +struct old_oblock_lock { + struct policy_locker locker; + struct cache *cache; + struct prealloc *structs; + struct dm_bio_prison_cell *cell; +}; + +static int null_locker(struct policy_locker *locker, dm_oblock_t b) +{ + /* This should never be called */ + BUG(); + return 0; +} + +static int cell_locker(struct policy_locker *locker, dm_oblock_t b) +{ + struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker); + struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs); + + return bio_detain(l->cache, b, NULL, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + l->structs, &l->cell); +} + static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { int r; bool release_cell = true; dm_oblock_t block = get_bio_block(cache, bio); - struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; struct policy_result lookup_result; bool passthrough = passthrough_mode(&cache->features); bool discarded_block, can_migrate; + struct old_oblock_lock ool; /* * Check to see if that block is currently migrating. @@ -1463,8 +1490,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs, discarded_block = is_discarded_oblock(cache, block); can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + ool.locker.fn = cell_locker; + ool.cache = cache; + ool.structs = structs; + ool.cell = NULL; r = policy_map(cache->policy, block, true, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) /* migration has been denied */ @@ -1521,27 +1552,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs, break; case POLICY_REPLACE: - cell_prealloc = prealloc_get_cell(structs); - r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, - (cell_free_fn) prealloc_put_cell, - structs, &old_ocell); - if (r > 0) { - /* - * We have to be careful to avoid lock inversion of - * the cells. So we back off, and wait for the - * old_ocell to become free. - */ - policy_force_mapping(cache->policy, block, - lookup_result.old_oblock); - atomic_inc(&cache->stats.cache_cell_clash); - break; - } atomic_inc(&cache->stats.demotion); atomic_inc(&cache->stats.promotion); - demote_then_promote(cache, structs, lookup_result.old_oblock, block, lookup_result.cblock, - old_ocell, new_ocell); + ool.cell, new_ocell); release_cell = false; break; @@ -2589,6 +2604,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso bool discarded_block; struct policy_result lookup_result; struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); + struct old_oblock_lock ool; + + ool.locker.fn = null_locker; if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* @@ -2627,7 +2645,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso discarded_block = is_discarded_oblock(cache, block); r = policy_map(cache->policy, block, false, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) { cell_defer(cache, *cell, true); return DM_MAPIO_SUBMITTED; -- cgit From 77289d32073c4eac57fcca2abe6caefc6f3dc7d6 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 15 May 2015 13:45:30 +0100 Subject: dm cache: add io_tracker A little class that keeps track of the volume of io that is in flight, and the length of time that a device has been idle for. FIXME: rather than jiffes, may be best to use ktime_t (to support faster devices). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index d5982480630b..6f9bdd1bf7c4 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -25,6 +25,79 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, /*----------------------------------------------------------------*/ +#define IOT_RESOLUTION 4 + +struct io_tracker { + spinlock_t lock; + + /* + * Sectors of in-flight IO. + */ + sector_t in_flight; + + /* + * The time, in jiffies, when this device became idle (if it is + * indeed idle). + */ + unsigned long idle_time; + unsigned long last_update_time; +}; + +static void iot_init(struct io_tracker *iot) +{ + spin_lock_init(&iot->lock); + iot->in_flight = 0ul; + iot->idle_time = 0ul; + iot->last_update_time = jiffies; +} + +static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs) +{ + if (iot->in_flight) + return false; + + return time_after(jiffies, iot->idle_time + jifs); +} + +static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&iot->lock, flags); + r = __iot_idle_for(iot, jifs); + spin_unlock_irqrestore(&iot->lock, flags); + + return r; +} + +static void iot_io_begin(struct io_tracker *iot, sector_t len) +{ + unsigned long flags; + + spin_lock_irqsave(&iot->lock, flags); + iot->in_flight += len; + spin_unlock_irqrestore(&iot->lock, flags); +} + +static void __iot_io_end(struct io_tracker *iot, sector_t len) +{ + iot->in_flight -= len; + if (!iot->in_flight) + iot->idle_time = jiffies; +} + +static void iot_io_end(struct io_tracker *iot, sector_t len) +{ + unsigned long flags; + + spin_lock_irqsave(&iot->lock, flags); + __iot_io_end(iot, len); + spin_unlock_irqrestore(&iot->lock, flags); +} + +/*----------------------------------------------------------------*/ + /* * Glossary: * -- cgit From 066dbaa386c751164c39ab025e5e8803b4a4d691 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 15 May 2015 15:18:01 +0100 Subject: dm cache: track IO to the origin device using io_tracker Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 56 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 6f9bdd1bf7c4..940c7b2b5ab4 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -355,6 +355,8 @@ struct cache { */ spinlock_t invalidation_lock; struct list_head invalidation_requests; + + struct io_tracker origin_tracker; }; struct per_bio_data { @@ -362,6 +364,7 @@ struct per_bio_data { unsigned req_nr:2; struct dm_deferred_entry *all_io_entry; struct dm_hook_info hook_info; + sector_t len; /* * writethrough fields. These MUST remain at the end of this @@ -768,6 +771,7 @@ static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) pb->tick = false; pb->req_nr = dm_bio_get_target_bio_nr(bio); pb->all_io_entry = NULL; + pb->len = 0; return pb; } @@ -865,12 +869,43 @@ static void inc_ds(struct cache *cache, struct bio *bio, pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); } +static bool accountable_bio(struct cache *cache, struct bio *bio) +{ + return ((bio->bi_bdev == cache->origin_dev->bdev) && + !(bio->bi_rw & REQ_DISCARD)); +} + +static void accounted_begin(struct cache *cache, struct bio *bio) +{ + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + + if (accountable_bio(cache, bio)) { + pb->len = bio_sectors(bio); + iot_io_begin(&cache->origin_tracker, pb->len); + } +} + +static void accounted_complete(struct cache *cache, struct bio *bio) +{ + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + + iot_io_end(&cache->origin_tracker, pb->len); +} + +static void accounted_request(struct cache *cache, struct bio *bio) +{ + accounted_begin(cache, bio); + generic_make_request(bio); +} + static void issue(struct cache *cache, struct bio *bio) { unsigned long flags; if (!bio_triggers_commit(cache, bio)) { - generic_make_request(bio); + accounted_request(cache, bio); return; } @@ -1166,7 +1201,7 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) * No need to inc_ds() here, since the cell will be held for the * duration of the io. */ - generic_make_request(bio); + accounted_request(mg->cache, bio); } static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) @@ -1722,7 +1757,7 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) * These bios have already been through inc_ds() */ while ((bio = bio_list_pop(&bios))) - submit_bios ? generic_make_request(bio) : bio_io_error(bio); + submit_bios ? accounted_request(cache, bio) : bio_io_error(bio); } static void process_deferred_writethrough_bios(struct cache *cache) @@ -1742,7 +1777,7 @@ static void process_deferred_writethrough_bios(struct cache *cache) * These bios have already been through inc_ds() */ while ((bio = bio_list_pop(&bios))) - generic_make_request(bio); + accounted_request(cache, bio); } static void writeback_some_dirty_blocks(struct cache *cache) @@ -2602,6 +2637,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) spin_lock_init(&cache->invalidation_lock); INIT_LIST_HEAD(&cache->invalidation_requests); + iot_init(&cache->origin_tracker); + *result = cache; return 0; @@ -2791,9 +2828,13 @@ static int cache_map(struct dm_target *ti, struct bio *bio) struct cache *cache = ti->private; r = __cache_map(cache, bio, &cell); - if (r == DM_MAPIO_REMAPPED && cell) { - inc_ds(cache, bio, cell); - cell_defer(cache, cell, false); + if (r == DM_MAPIO_REMAPPED) { + accounted_begin(cache, bio); + + if (cell) { + inc_ds(cache, bio, cell); + cell_defer(cache, cell, false); + } } return r; @@ -2815,6 +2856,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) } check_for_quiesced_migrations(cache, pb); + accounted_complete(cache, bio); return 0; } -- cgit From 20f6814b94fff4a98b123f1c2b691e936be27aaf Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 15 May 2015 15:20:09 +0100 Subject: dm cache: pass a new 'critical' flag to the policies when requesting writeback work We only allow non critical writeback if the origin is idle. It is up to the policy to decide what writeback work is critical. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 940c7b2b5ab4..5a9cd2c5a359 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1787,6 +1787,7 @@ static void writeback_some_dirty_blocks(struct cache *cache) dm_cblock_t cblock; struct prealloc structs; struct dm_bio_prison_cell *old_ocell; + bool busy = !iot_idle_for(&cache->origin_tracker, HZ); memset(&structs, 0, sizeof(structs)); @@ -1794,7 +1795,7 @@ static void writeback_some_dirty_blocks(struct cache *cache) if (prealloc_data_structs(cache, &structs)) break; - r = policy_writeback_work(cache->policy, &oblock, &cblock); + r = policy_writeback_work(cache->policy, &oblock, &cblock, busy); if (r) break; -- cgit From 451b9e0071b2833744db7f518115bc085bc7b23c Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 15 May 2015 15:22:02 +0100 Subject: dm cache: pull out some bitset utility functions for reuse Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 5a9cd2c5a359..5d3b20b91ba3 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -111,30 +111,6 @@ static void iot_io_end(struct io_tracker *iot, sector_t len) /*----------------------------------------------------------------*/ -static size_t bitset_size_in_bytes(unsigned nr_entries) -{ - return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); -} - -static unsigned long *alloc_bitset(unsigned nr_entries) -{ - size_t s = bitset_size_in_bytes(nr_entries); - return vzalloc(s); -} - -static void clear_bitset(void *bitset, unsigned nr_entries) -{ - size_t s = bitset_size_in_bytes(nr_entries); - memset(bitset, 0, s); -} - -static void free_bitset(unsigned long *bits) -{ - vfree(bits); -} - -/*----------------------------------------------------------------*/ - /* * There are a couple of places where we let a bio run, but want to do some * work before calling its endio function. We do this by temporarily -- cgit From 651f5fa2a3959ff5db60c09a84efd66309fe4035 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 15 May 2015 15:26:08 +0100 Subject: dm cache: defer whole cells Currently individual bios are deferred to the worker thread if they cannot be processed immediately (eg, a block is in the process of being moved to the fast device). This patch passes whole cells across to the worker. This saves reaquiring the cell, and also collects bios destined for the same block together, which allows them to be mapped with a single look up to the policy. This reduces the overhead of using dm-cache. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 325 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 262 insertions(+), 63 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 5d3b20b91ba3..d2d91c164420 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -257,6 +257,7 @@ struct cache { int sectors_per_block_shift; spinlock_t lock; + struct list_head deferred_cells; struct bio_list deferred_bios; struct bio_list deferred_flush_bios; struct bio_list deferred_writethrough_bios; @@ -969,26 +970,63 @@ static void dec_io_migrations(struct cache *cache) atomic_dec(&cache->nr_io_migrations); } -static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, - bool holder) +static void __cell_release(struct cache *cache, struct dm_bio_prison_cell *cell, + bool holder, struct bio_list *bios) { (holder ? dm_cell_release : dm_cell_release_no_holder) - (cache->prison, cell, &cache->deferred_bios); + (cache->prison, cell, bios); free_prison_cell(cache, cell); } -static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, - bool holder) +static bool discard_or_flush(struct bio *bio) +{ + return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD); +} + +static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) +{ + if (discard_or_flush(cell->holder)) + /* + * We have to handle these bios + * individually. + */ + __cell_release(cache, cell, true, &cache->deferred_bios); + + else + list_add_tail(&cell->user_list, &cache->deferred_cells); +} + +static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder) { unsigned long flags; + if (!holder && dm_cell_promote_or_release(cache->prison, cell)) { + /* + * There was no prisoner to promote to holder, the + * cell has been released. + */ + free_prison_cell(cache, cell); + return; + } + spin_lock_irqsave(&cache->lock, flags); - __cell_defer(cache, cell, holder); + __cell_defer(cache, cell); spin_unlock_irqrestore(&cache->lock, flags); wake_worker(cache); } +static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err) +{ + dm_cell_error(cache->prison, cell, err); + dm_bio_prison_free_cell(cache->prison, cell); +} + +static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) +{ + cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE); +} + static void free_io_migration(struct dm_cache_migration *mg) { dec_io_migrations(mg->cache); @@ -1525,6 +1563,107 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) /*----------------------------------------------------------------*/ +struct inc_detail { + struct cache *cache; + struct bio_list bios_for_issue; + struct bio_list unhandled_bios; + bool any_writes; +}; + +static void inc_fn(void *context, struct dm_bio_prison_cell *cell) +{ + struct bio *bio; + struct inc_detail *detail = context; + struct cache *cache = detail->cache; + + inc_ds(cache, cell->holder, cell); + if (bio_data_dir(cell->holder) == WRITE) + detail->any_writes = true; + + while ((bio = bio_list_pop(&cell->bios))) { + if (discard_or_flush(bio)) { + bio_list_add(&detail->unhandled_bios, bio); + continue; + } + + if (bio_data_dir(bio) == WRITE) + detail->any_writes = true; + + bio_list_add(&detail->bios_for_issue, bio); + inc_ds(cache, bio, cell); + } +} + +// FIXME: refactor these two +static void remap_cell_to_origin_clear_discard(struct cache *cache, + struct dm_bio_prison_cell *cell, + dm_oblock_t oblock, bool issue_holder) +{ + struct bio *bio; + unsigned long flags; + struct inc_detail detail; + + detail.cache = cache; + bio_list_init(&detail.bios_for_issue); + bio_list_init(&detail.unhandled_bios); + detail.any_writes = false; + + spin_lock_irqsave(&cache->lock, flags); + dm_cell_visit_release(cache->prison, inc_fn, &detail, cell); + bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + remap_to_origin(cache, cell->holder); + if (issue_holder) + issue(cache, cell->holder); + else + accounted_begin(cache, cell->holder); + + if (detail.any_writes) + clear_discard(cache, oblock_to_dblock(cache, oblock)); + + while ((bio = bio_list_pop(&detail.bios_for_issue))) { + remap_to_origin(cache, bio); + issue(cache, bio); + } +} + +static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell, + dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder) +{ + struct bio *bio; + unsigned long flags; + struct inc_detail detail; + + detail.cache = cache; + bio_list_init(&detail.bios_for_issue); + bio_list_init(&detail.unhandled_bios); + detail.any_writes = false; + + spin_lock_irqsave(&cache->lock, flags); + dm_cell_visit_release(cache->prison, inc_fn, &detail, cell); + bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + remap_to_cache(cache, cell->holder, cblock); + if (issue_holder) + issue(cache, cell->holder); + else + accounted_begin(cache, cell->holder); + + if (detail.any_writes) { + set_dirty(cache, oblock, cblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); + } + + while ((bio = bio_list_pop(&detail.bios_for_issue))) { + remap_to_cache(cache, bio, cblock); + issue(cache, bio); + } +} + +/*----------------------------------------------------------------*/ + struct old_oblock_lock { struct policy_locker locker; struct cache *cache; @@ -1549,28 +1688,18 @@ static int cell_locker(struct policy_locker *locker, dm_oblock_t b) l->structs, &l->cell); } -static void process_bio(struct cache *cache, struct prealloc *structs, - struct bio *bio) +static void process_cell(struct cache *cache, struct prealloc *structs, + struct dm_bio_prison_cell *new_ocell) { int r; bool release_cell = true; + struct bio *bio = new_ocell->holder; dm_oblock_t block = get_bio_block(cache, bio); - struct dm_bio_prison_cell *cell_prealloc, *new_ocell; struct policy_result lookup_result; bool passthrough = passthrough_mode(&cache->features); bool discarded_block, can_migrate; struct old_oblock_lock ool; - /* - * Check to see if that block is currently migrating. - */ - cell_prealloc = prealloc_get_cell(structs); - r = bio_detain(cache, block, bio, cell_prealloc, - (cell_free_fn) prealloc_put_cell, - structs, &new_ocell); - if (r > 0) - return; - discarded_block = is_discarded_oblock(cache, block); can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); @@ -1615,9 +1744,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs, remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); inc_and_issue(cache, bio, new_ocell); - } else { - remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); - inc_and_issue(cache, bio, new_ocell); + } else { + remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true); + release_cell = false; } } @@ -1625,8 +1754,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, case POLICY_MISS: inc_miss_counter(cache, bio); - remap_to_origin_clear_discard(cache, bio, block); - inc_and_issue(cache, bio, new_ocell); + remap_cell_to_origin_clear_discard(cache, new_ocell, block, true); + release_cell = false; break; case POLICY_NEW: @@ -1654,10 +1783,30 @@ static void process_bio(struct cache *cache, struct prealloc *structs, cell_defer(cache, new_ocell, false); } +static void process_bio(struct cache *cache, struct prealloc *structs, + struct bio *bio) +{ + int r; + dm_oblock_t block = get_bio_block(cache, bio); + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; + + /* + * Check to see if that block is currently migrating. + */ + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain(cache, block, bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &new_ocell); + if (r > 0) + return; + + process_cell(cache, structs, new_ocell); +} + static int need_commit_due_to_time(struct cache *cache) { - return !time_in_range(jiffies, cache->last_commit_jiffies, - cache->last_commit_jiffies + COMMIT_PERIOD); + return jiffies < cache->last_commit_jiffies || + jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; } static int commit_if_needed(struct cache *cache) @@ -1716,6 +1865,40 @@ static void process_deferred_bios(struct cache *cache) prealloc_free_structs(cache, &structs); } +static void process_deferred_cells(struct cache *cache) +{ + unsigned long flags; + struct dm_bio_prison_cell *cell, *tmp; + struct list_head cells; + struct prealloc structs; + + memset(&structs, 0, sizeof(structs)); + + INIT_LIST_HEAD(&cells); + + spin_lock_irqsave(&cache->lock, flags); + list_splice_init(&cache->deferred_cells, &cells); + spin_unlock_irqrestore(&cache->lock, flags); + + list_for_each_entry_safe(cell, tmp, &cells, user_list) { + /* + * If we've got no free migration structs, and processing + * this bio might require one, we pause until there are some + * prepared mappings to process. + */ + if (prealloc_data_structs(cache, &structs)) { + spin_lock_irqsave(&cache->lock, flags); + list_splice(&cells, &cache->deferred_cells); + spin_unlock_irqrestore(&cache->lock, flags); + break; + } + + process_cell(cache, &structs, cell); + } + + prealloc_free_structs(cache, &structs); +} + static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) { unsigned long flags; @@ -1883,7 +2066,22 @@ static void stop_worker(struct cache *cache) flush_workqueue(cache->wq); } -static void requeue_deferred_io(struct cache *cache) +static void requeue_deferred_cells(struct cache *cache) +{ + unsigned long flags; + struct list_head cells; + struct dm_bio_prison_cell *cell, *tmp; + + INIT_LIST_HEAD(&cells); + spin_lock_irqsave(&cache->lock, flags); + list_splice_init(&cache->deferred_cells, &cells); + spin_unlock_irqrestore(&cache->lock, flags); + + list_for_each_entry_safe(cell, tmp, &cells, user_list) + cell_requeue(cache, cell); +} + +static void requeue_deferred_bios(struct cache *cache) { struct bio *bio; struct bio_list bios; @@ -1904,6 +2102,7 @@ static int more_work(struct cache *cache) !list_empty(&cache->need_commit_migrations); else return !bio_list_empty(&cache->deferred_bios) || + !list_empty(&cache->deferred_cells) || !bio_list_empty(&cache->deferred_flush_bios) || !bio_list_empty(&cache->deferred_writethrough_bios) || !list_empty(&cache->quiesced_migrations) || @@ -1921,6 +2120,7 @@ static void do_worker(struct work_struct *ws) writeback_some_dirty_blocks(cache); process_deferred_writethrough_bios(cache); process_deferred_bios(cache); + process_deferred_cells(cache); process_invalidation_requests(cache); } @@ -1935,6 +2135,7 @@ static void do_worker(struct work_struct *ws) * FIXME: rollback metadata or just go into a * failure mode and error everything */ + } else { process_deferred_flush_bios(cache, true); process_migrations(cache, &cache->need_commit_migrations, @@ -2525,6 +2726,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) } spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->deferred_cells); bio_list_init(&cache->deferred_bios); bio_list_init(&cache->deferred_flush_bios); bio_list_init(&cache->deferred_writethrough_bios); @@ -2682,9 +2884,14 @@ out: return r; } -static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell) +/*----------------------------------------------------------------*/ + +static int cache_map(struct dm_target *ti, struct bio *bio) { + struct cache *cache = ti->private; + int r; + struct dm_bio_prison_cell *cell = NULL; dm_oblock_t block = get_bio_block(cache, bio); size_t pb_data_size = get_per_bio_data_size(cache); bool can_migrate = false; @@ -2702,10 +2909,11 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso * Just remap to the origin and carry on. */ remap_to_origin(cache, bio); + accounted_begin(cache, bio); return DM_MAPIO_REMAPPED; } - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { + if (discard_or_flush(bio)) { defer_bio(cache, bio); return DM_MAPIO_SUBMITTED; } @@ -2713,15 +2921,15 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso /* * Check to see if that block is currently migrating. */ - *cell = alloc_prison_cell(cache); - if (!*cell) { + cell = alloc_prison_cell(cache); + if (!cell) { defer_bio(cache, bio); return DM_MAPIO_SUBMITTED; } - r = bio_detain(cache, block, bio, *cell, + r = bio_detain(cache, block, bio, cell, (cell_free_fn) free_prison_cell, - cache, cell); + cache, &cell); if (r) { if (r < 0) defer_bio(cache, bio); @@ -2734,12 +2942,12 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso r = policy_map(cache->policy, block, false, can_migrate, discarded_block, bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) { - cell_defer(cache, *cell, true); + cell_defer(cache, cell, true); return DM_MAPIO_SUBMITTED; } else if (r) { DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); - cell_defer(cache, *cell, false); + cell_defer(cache, cell, false); bio_io_error(bio); return DM_MAPIO_SUBMITTED; } @@ -2753,21 +2961,30 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso * We need to invalidate this block, so * defer for the worker thread. */ - cell_defer(cache, *cell, true); + cell_defer(cache, cell, true); r = DM_MAPIO_SUBMITTED; } else { inc_miss_counter(cache, bio); remap_to_origin_clear_discard(cache, bio, block); + accounted_begin(cache, bio); + inc_ds(cache, bio, cell); + // FIXME: we want to remap hits or misses straight + // away rather than passing over to the worker. + cell_defer(cache, cell, false); } } else { inc_hit_counter(cache, bio); if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && - !is_dirty(cache, lookup_result.cblock)) + !is_dirty(cache, lookup_result.cblock)) { remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - else - remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + accounted_begin(cache, bio); + inc_ds(cache, bio, cell); + cell_defer(cache, cell, false); + + } else + remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false); } break; @@ -2779,18 +2996,18 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso * longer needed because the block has been demoted. */ bio_endio(bio, 0); - cell_defer(cache, *cell, false); + // FIXME: remap everything as a miss + cell_defer(cache, cell, false); r = DM_MAPIO_SUBMITTED; } else - remap_to_origin_clear_discard(cache, bio, block); - + remap_cell_to_origin_clear_discard(cache, cell, block, false); break; default: DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, (unsigned) lookup_result.op); - cell_defer(cache, *cell, false); + cell_defer(cache, cell, false); bio_io_error(bio); r = DM_MAPIO_SUBMITTED; } @@ -2798,25 +3015,6 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso return r; } -static int cache_map(struct dm_target *ti, struct bio *bio) -{ - int r; - struct dm_bio_prison_cell *cell = NULL; - struct cache *cache = ti->private; - - r = __cache_map(cache, bio, &cell); - if (r == DM_MAPIO_REMAPPED) { - accounted_begin(cache, bio); - - if (cell) { - inc_ds(cache, bio, cell); - cell_defer(cache, cell, false); - } - } - - return r; -} - static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) { struct cache *cache = ti->private; @@ -2913,7 +3111,8 @@ static void cache_postsuspend(struct dm_target *ti) start_quiescing(cache); wait_for_migrations(cache); stop_worker(cache); - requeue_deferred_io(cache); + requeue_deferred_bios(cache); + requeue_deferred_cells(cache); stop_quiescing(cache); (void) sync_metadata(cache); -- cgit From 40775257b97e27305cf5c2425be7acaa6edee4ea Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 15 May 2015 15:29:58 +0100 Subject: dm cache: boost promotion of blocks that will be overwritten When considering whether to move a block to the cache we already give preferential treatment to discarded blocks, since they are cheap to promote (no read of the origin required since the data is junk). The same is true of blocks that are about to be completely overwritten, so we likewise boost their promotion chances. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index d2d91c164420..7829d947ef01 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1697,17 +1697,17 @@ static void process_cell(struct cache *cache, struct prealloc *structs, dm_oblock_t block = get_bio_block(cache, bio); struct policy_result lookup_result; bool passthrough = passthrough_mode(&cache->features); - bool discarded_block, can_migrate; + bool fast_promotion, can_migrate; struct old_oblock_lock ool; - discarded_block = is_discarded_oblock(cache, block); - can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio); + can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache)); ool.locker.fn = cell_locker; ool.cache = cache; ool.structs = structs; ool.cell = NULL; - r = policy_map(cache->policy, block, true, can_migrate, discarded_block, + r = policy_map(cache->policy, block, true, can_migrate, fast_promotion, bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) @@ -2895,7 +2895,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) dm_oblock_t block = get_bio_block(cache, bio); size_t pb_data_size = get_per_bio_data_size(cache); bool can_migrate = false; - bool discarded_block; + bool fast_promotion; struct policy_result lookup_result; struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); struct old_oblock_lock ool; @@ -2937,9 +2937,9 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } - discarded_block = is_discarded_oblock(cache, block); + fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio); - r = policy_map(cache->policy, block, false, can_migrate, discarded_block, + r = policy_map(cache->policy, block, false, can_migrate, fast_promotion, bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) { cell_defer(cache, cell, true); -- cgit From 88bf5184fa5861e766e39fd34fc6d21557ac7be8 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 27 May 2015 15:39:45 +0100 Subject: dm cache: wake the worker thread every time we free a migration object When the cache is idle, writeback work was only being issued every second. With this change outstanding writebacks are streamed constantly. This offers a writeback performance improvement. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7829d947ef01..6d36ed3c46a0 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -418,10 +418,13 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) static void free_migration(struct dm_cache_migration *mg) { - if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations)) - wake_up(&mg->cache->migration_wait); + struct cache *cache = mg->cache; + + if (atomic_dec_and_test(&cache->nr_allocated_migrations)) + wake_up(&cache->migration_wait); - mempool_free(mg, mg->cache->migration_pool); + mempool_free(mg, cache->migration_pool); + wake_worker(cache); } static int prealloc_data_structs(struct cache *cache, struct prealloc *p) -- cgit From 028ae9f76f2935e8cf9974bff9a4587e3a995ff3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 22 Apr 2015 16:42:35 -0400 Subject: dm cache: add fail io mode and needs_check flag If a cache metadata operation fails (e.g. transaction commit) the cache's metadata device will abort the current transaction, set a new needs_check flag, and the cache will transition to "read-only" mode. If aborting the transaction or setting the needs_check flag fails the cache will transition to "fail-io" mode. Once needs_check is set the cache device will not be allowed to activate. Activation requires write access to metadata. Future work is needed to add proper support for running the cache in read-only mode. Once in fail-io mode the cache will report a status of "Fail". Also, add commit() wrapper that will disallow commits if in read_only or fail mode. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 204 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 176 insertions(+), 28 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 6d36ed3c46a0..dae0321ebfa9 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -150,12 +150,10 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) -/* - * FIXME: the cache is read/write for the time being. - */ enum cache_metadata_mode { CM_WRITE, /* metadata may be changed */ CM_READ_ONLY, /* metadata may not be changed */ + CM_FAIL }; enum cache_io_mode { @@ -385,6 +383,8 @@ struct prealloc { struct dm_bio_prison_cell *cell2; }; +static enum cache_metadata_mode get_cache_mode(struct cache *cache); + static void wake_worker(struct cache *cache) { queue_work(cache->wq, &cache->worker); @@ -699,6 +699,9 @@ static void save_stats(struct cache *cache) { struct dm_cache_statistics stats; + if (get_cache_mode(cache) >= CM_READ_ONLY) + return; + stats.read_hits = atomic_read(&cache->stats.read_hit); stats.read_misses = atomic_read(&cache->stats.read_miss); stats.write_hits = atomic_read(&cache->stats.write_hit); @@ -957,6 +960,84 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, remap_to_origin_clear_discard(pb->cache, bio, oblock); } +/*---------------------------------------------------------------- + * Failure modes + *--------------------------------------------------------------*/ +static enum cache_metadata_mode get_cache_mode(struct cache *cache) +{ + return cache->features.mode; +} + +static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode) +{ + const char *descs[] = { + "write", + "read-only", + "fail" + }; + + dm_table_event(cache->ti->table); + DMINFO("switching cache to %s mode", descs[(int)mode]); +} + +static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode) +{ + bool needs_check = dm_cache_metadata_needs_check(cache->cmd); + enum cache_metadata_mode old_mode = get_cache_mode(cache); + + if (new_mode == CM_WRITE && needs_check) { + DMERR("unable to switch cache to write mode until repaired."); + if (old_mode != new_mode) + new_mode = old_mode; + else + new_mode = CM_READ_ONLY; + } + + /* Never move out of fail mode */ + if (old_mode == CM_FAIL) + new_mode = CM_FAIL; + + switch (new_mode) { + case CM_FAIL: + case CM_READ_ONLY: + dm_cache_metadata_set_read_only(cache->cmd); + break; + + case CM_WRITE: + dm_cache_metadata_set_read_write(cache->cmd); + break; + } + + cache->features.mode = new_mode; + + if (new_mode != old_mode) + notify_mode_switch(cache, new_mode); +} + +static void abort_transaction(struct cache *cache) +{ + if (get_cache_mode(cache) >= CM_READ_ONLY) + return; + + if (dm_cache_metadata_set_needs_check(cache->cmd)) { + DMERR("failed to set 'needs_check' flag in metadata"); + set_cache_mode(cache, CM_FAIL); + } + + DMERR_LIMIT("aborting current metadata transaction"); + if (dm_cache_metadata_abort(cache->cmd)) { + DMERR("failed to abort metadata transaction"); + set_cache_mode(cache, CM_FAIL); + } +} + +static void metadata_operation_failed(struct cache *cache, const char *op, int r) +{ + DMERR_LIMIT("metadata operation '%s' failed: error = %d", op, r); + abort_transaction(cache); + set_cache_mode(cache, CM_READ_ONLY); +} + /*---------------------------------------------------------------- * Migration processing * @@ -1063,6 +1144,7 @@ static void migration_failure(struct dm_cache_migration *mg) static void migration_success_pre_commit(struct dm_cache_migration *mg) { + int r; unsigned long flags; struct cache *cache = mg->cache; @@ -1073,8 +1155,10 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) return; } else if (mg->demote) { - if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { + r = dm_cache_remove_mapping(cache->cmd, mg->cblock); + if (r) { DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); + metadata_operation_failed(cache, "dm_cache_remove_mapping", r); policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); if (mg->promote) @@ -1083,8 +1167,10 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) return; } } else { - if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { + r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock); + if (r) { DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); + metadata_operation_failed(cache, "dm_cache_insert_mapping", r); policy_remove_mapping(cache->policy, mg->new_oblock); free_io_migration(mg); return; @@ -1812,15 +1898,32 @@ static int need_commit_due_to_time(struct cache *cache) jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; } +/* + * A non-zero return indicates read_only or fail_io mode. + */ +static int commit(struct cache *cache, bool clean_shutdown) +{ + int r; + + if (get_cache_mode(cache) >= CM_READ_ONLY) + return -EINVAL; + + atomic_inc(&cache->stats.commit_count); + r = dm_cache_commit(cache->cmd, clean_shutdown); + if (r) + metadata_operation_failed(cache, "dm_cache_commit", r); + + return r; +} + static int commit_if_needed(struct cache *cache) { int r = 0; if ((cache->commit_requested || need_commit_due_to_time(cache)) && dm_cache_changed_this_transaction(cache->cmd)) { - atomic_inc(&cache->stats.commit_count); + r = commit(cache, false); cache->commit_requested = false; - r = dm_cache_commit(cache->cmd, false); cache->last_commit_jiffies = jiffies; } @@ -1988,8 +2091,10 @@ static void process_invalidation_request(struct cache *cache, struct invalidatio r = policy_remove_cblock(cache->policy, to_cblock(begin)); if (!r) { r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); - if (r) + if (r) { + metadata_operation_failed(cache, "dm_cache_remove_mapping", r); break; + } } else if (r == -ENODATA) { /* harmless, already unmapped */ @@ -2133,12 +2238,6 @@ static void do_worker(struct work_struct *ws) if (commit_if_needed(cache)) { process_deferred_flush_bios(cache, false); process_migrations(cache, &cache->need_commit_migrations, migration_failure); - - /* - * FIXME: rollback metadata or just go into a - * failure mode and error everything - */ - } else { process_deferred_flush_bios(cache, true); process_migrations(cache, &cache->need_commit_migrations, @@ -2711,6 +2810,12 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } cache->cmd = cmd; + set_cache_mode(cache, CM_WRITE); + if (get_cache_mode(cache) != CM_WRITE) { + *error = "Unable to get write access to metadata, please check/repair metadata."; + r = -EINVAL; + goto bad; + } if (passthrough_mode(&cache->features)) { bool all_clean; @@ -3043,11 +3148,16 @@ static int write_dirty_bitset(struct cache *cache) { unsigned i, r; + if (get_cache_mode(cache) >= CM_READ_ONLY) + return -EINVAL; + for (i = 0; i < from_cblock(cache->cache_size); i++) { r = dm_cache_set_dirty(cache->cmd, to_cblock(i), is_dirty(cache, to_cblock(i))); - if (r) + if (r) { + metadata_operation_failed(cache, "dm_cache_set_dirty", r); return r; + } } return 0; @@ -3057,18 +3167,40 @@ static int write_discard_bitset(struct cache *cache) { unsigned i, r; + if (get_cache_mode(cache) >= CM_READ_ONLY) + return -EINVAL; + r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, cache->discard_nr_blocks); if (r) { DMERR("could not resize on-disk discard bitset"); + metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r); return r; } for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { r = dm_cache_set_discard(cache->cmd, to_dblock(i), is_discarded(cache, to_dblock(i))); - if (r) + if (r) { + metadata_operation_failed(cache, "dm_cache_set_discard", r); return r; + } + } + + return 0; +} + +static int write_hints(struct cache *cache) +{ + int r; + + if (get_cache_mode(cache) >= CM_READ_ONLY) + return -EINVAL; + + r = dm_cache_write_hints(cache->cmd, cache->policy); + if (r) { + metadata_operation_failed(cache, "dm_cache_write_hints", r); + return r; } return 0; @@ -3091,7 +3223,7 @@ static bool sync_metadata(struct cache *cache) save_stats(cache); - r3 = dm_cache_write_hints(cache->cmd, cache->policy); + r3 = write_hints(cache); if (r3) DMERR("could not write hints"); @@ -3100,9 +3232,9 @@ static bool sync_metadata(struct cache *cache) * set the clean shutdown flag. This will effectively force every * dirty bit to be set on reload. */ - r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); + r4 = commit(cache, !r1 && !r2 && !r3); if (r4) - DMERR("could not write cache metadata. Data loss may occur."); + DMERR("could not write cache metadata."); return !r1 && !r2 && !r3 && !r4; } @@ -3118,7 +3250,8 @@ static void cache_postsuspend(struct dm_target *ti) requeue_deferred_cells(cache); stop_quiescing(cache); - (void) sync_metadata(cache); + if (get_cache_mode(cache) == CM_WRITE) + (void) sync_metadata(cache); } static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, @@ -3257,6 +3390,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) r = dm_cache_resize(cache->cmd, new_size); if (r) { DMERR("could not resize cache metadata"); + metadata_operation_failed(cache, "dm_cache_resize", r); return r; } @@ -3295,6 +3429,7 @@ static int cache_preresume(struct dm_target *ti) load_mapping, cache); if (r) { DMERR("could not load cache mappings"); + metadata_operation_failed(cache, "dm_cache_load_mappings", r); return r; } @@ -3315,6 +3450,7 @@ static int cache_preresume(struct dm_target *ti) r = dm_cache_load_discards(cache->cmd, load_discard, &li); if (r) { DMERR("could not load origin discards"); + metadata_operation_failed(cache, "dm_cache_load_discards", r); return r; } set_discard_range(&li); @@ -3342,7 +3478,7 @@ static void cache_resume(struct dm_target *ti) * <#demotions> <#promotions> <#dirty> * <#features> * * <#core args> - * <#policy args> * + * <#policy args> * */ static void cache_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -3358,13 +3494,15 @@ static void cache_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - /* Commit to ensure statistics aren't out-of-date */ - if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { - r = dm_cache_commit(cache->cmd, false); - if (r) - DMERR("could not commit metadata for accurate status"); + if (get_cache_mode(cache) == CM_FAIL) { + DMEMIT("Fail"); + break; } + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) + (void) commit(cache, false); + r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata); if (r) { @@ -3413,11 +3551,16 @@ static void cache_status(struct dm_target *ti, status_type_t type, DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); if (sz < maxlen) { - r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); + r = policy_emit_config_values(cache->policy, result, maxlen, &sz); if (r) DMERR("policy_emit_config_values returned %d", r); } + if (get_cache_mode(cache) == CM_READ_ONLY) + DMEMIT("ro "); + else + DMEMIT("rw "); + break; case STATUSTYPE_TABLE: @@ -3573,6 +3716,11 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv) if (!argc) return -EINVAL; + if (get_cache_mode(cache) >= CM_READ_ONLY) { + DMERR("unable to service cache target messages in READ_ONLY or FAIL mode"); + return -EOPNOTSUPP; + } + if (!strcasecmp(argv[0], "invalidate_cblocks")) return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); @@ -3646,7 +3794,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, -- cgit From b61d9509628fea995196a96b4c1713fa67dade88 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 22 Apr 2015 17:25:56 -0400 Subject: dm cache: prefix all DMERR and DMINFO messages with cache device name Having the DM device name associated with the ERR or INFO message is very helpful. Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 102 +++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 38 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index dae0321ebfa9..5aad875b822c 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -968,6 +968,11 @@ static enum cache_metadata_mode get_cache_mode(struct cache *cache) return cache->features.mode; } +static const char *cache_device_name(struct cache *cache) +{ + return dm_device_name(dm_table_get_md(cache->ti->table)); +} + static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode) { const char *descs[] = { @@ -977,7 +982,8 @@ static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mod }; dm_table_event(cache->ti->table); - DMINFO("switching cache to %s mode", descs[(int)mode]); + DMINFO("%s: switching cache to %s mode", + cache_device_name(cache), descs[(int)mode]); } static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode) @@ -986,7 +992,8 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod enum cache_metadata_mode old_mode = get_cache_mode(cache); if (new_mode == CM_WRITE && needs_check) { - DMERR("unable to switch cache to write mode until repaired."); + DMERR("%s: unable to switch cache to write mode until repaired.", + cache_device_name(cache)); if (old_mode != new_mode) new_mode = old_mode; else @@ -1016,24 +1023,27 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod static void abort_transaction(struct cache *cache) { + const char *dev_name = cache_device_name(cache); + if (get_cache_mode(cache) >= CM_READ_ONLY) return; if (dm_cache_metadata_set_needs_check(cache->cmd)) { - DMERR("failed to set 'needs_check' flag in metadata"); + DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); set_cache_mode(cache, CM_FAIL); } - DMERR_LIMIT("aborting current metadata transaction"); + DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); if (dm_cache_metadata_abort(cache->cmd)) { - DMERR("failed to abort metadata transaction"); + DMERR("%s: failed to abort metadata transaction", dev_name); set_cache_mode(cache, CM_FAIL); } } static void metadata_operation_failed(struct cache *cache, const char *op, int r) { - DMERR_LIMIT("metadata operation '%s' failed: error = %d", op, r); + DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d", + cache_device_name(cache), op, r); abort_transaction(cache); set_cache_mode(cache, CM_READ_ONLY); } @@ -1120,21 +1130,22 @@ static void free_io_migration(struct dm_cache_migration *mg) static void migration_failure(struct dm_cache_migration *mg) { struct cache *cache = mg->cache; + const char *dev_name = cache_device_name(cache); if (mg->writeback) { - DMWARN_LIMIT("writeback failed; couldn't copy block"); + DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name); set_dirty(cache, mg->old_oblock, mg->cblock); cell_defer(cache, mg->old_ocell, false); } else if (mg->demote) { - DMWARN_LIMIT("demotion failed; couldn't copy block"); + DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name); policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); cell_defer(cache, mg->old_ocell, mg->promote ? false : true); if (mg->promote) cell_defer(cache, mg->new_ocell, true); } else { - DMWARN_LIMIT("promotion failed; couldn't copy block"); + DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name); policy_remove_mapping(cache->policy, mg->new_oblock); cell_defer(cache, mg->new_ocell, true); } @@ -1157,7 +1168,8 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) } else if (mg->demote) { r = dm_cache_remove_mapping(cache->cmd, mg->cblock); if (r) { - DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); + DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata", + cache_device_name(cache)); metadata_operation_failed(cache, "dm_cache_remove_mapping", r); policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); @@ -1169,7 +1181,8 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) } else { r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock); if (r) { - DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); + DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata", + cache_device_name(cache)); metadata_operation_failed(cache, "dm_cache_insert_mapping", r); policy_remove_mapping(cache->policy, mg->new_oblock); free_io_migration(mg); @@ -1189,7 +1202,8 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) struct cache *cache = mg->cache; if (mg->writeback) { - DMWARN("writeback unexpectedly triggered commit"); + DMWARN_LIMIT("%s: writeback unexpectedly triggered commit", + cache_device_name(cache)); return; } else if (mg->demote) { @@ -1265,7 +1279,7 @@ static void issue_copy(struct dm_cache_migration *mg) } if (r < 0) { - DMERR_LIMIT("issuing migration failed"); + DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache)); migration_failure(mg); } } @@ -1863,7 +1877,8 @@ static void process_cell(struct cache *cache, struct prealloc *structs, break; default: - DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, + DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u", + cache_device_name(cache), __func__, (unsigned) lookup_result.op); bio_io_error(bio); } @@ -2101,7 +2116,7 @@ static void process_invalidation_request(struct cache *cache, struct invalidatio r = 0; } else { - DMERR("policy_remove_cblock failed"); + DMERR("%s: policy_remove_cblock failed", cache_device_name(cache)); break; } @@ -3054,7 +3069,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } else if (r) { - DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); + DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d", + cache_device_name(cache), r); cell_defer(cache, cell, false); bio_io_error(bio); return DM_MAPIO_SUBMITTED; @@ -3113,7 +3129,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio) break; default: - DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, + DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u", + cache_device_name(cache), __func__, (unsigned) lookup_result.op); cell_defer(cache, cell, false); bio_io_error(bio); @@ -3173,7 +3190,7 @@ static int write_discard_bitset(struct cache *cache) r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, cache->discard_nr_blocks); if (r) { - DMERR("could not resize on-disk discard bitset"); + DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache)); metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r); return r; } @@ -3215,17 +3232,17 @@ static bool sync_metadata(struct cache *cache) r1 = write_dirty_bitset(cache); if (r1) - DMERR("could not write dirty bitset"); + DMERR("%s: could not write dirty bitset", cache_device_name(cache)); r2 = write_discard_bitset(cache); if (r2) - DMERR("could not write discard bitset"); + DMERR("%s: could not write discard bitset", cache_device_name(cache)); save_stats(cache); r3 = write_hints(cache); if (r3) - DMERR("could not write hints"); + DMERR("%s: could not write hints", cache_device_name(cache)); /* * If writing the above metadata failed, we still commit, but don't @@ -3234,7 +3251,7 @@ static bool sync_metadata(struct cache *cache) */ r4 = commit(cache, !r1 && !r2 && !r3); if (r4) - DMERR("could not write cache metadata."); + DMERR("%s: could not write cache metadata", cache_device_name(cache)); return !r1 && !r2 && !r3 && !r4; } @@ -3374,7 +3391,8 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) while (from_cblock(new_size) < from_cblock(cache->cache_size)) { new_size = to_cblock(from_cblock(new_size) + 1); if (is_dirty(cache, new_size)) { - DMERR("unable to shrink cache; cache block %llu is dirty", + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), (unsigned long long) from_cblock(new_size)); return false; } @@ -3389,7 +3407,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) r = dm_cache_resize(cache->cmd, new_size); if (r) { - DMERR("could not resize cache metadata"); + DMERR("%s: could not resize cache metadata", cache_device_name(cache)); metadata_operation_failed(cache, "dm_cache_resize", r); return r; } @@ -3428,7 +3446,7 @@ static int cache_preresume(struct dm_target *ti) r = dm_cache_load_mappings(cache->cmd, cache->policy, load_mapping, cache); if (r) { - DMERR("could not load cache mappings"); + DMERR("%s: could not load cache mappings", cache_device_name(cache)); metadata_operation_failed(cache, "dm_cache_load_mappings", r); return r; } @@ -3449,7 +3467,7 @@ static int cache_preresume(struct dm_target *ti) discard_load_info_init(cache, &li); r = dm_cache_load_discards(cache->cmd, load_discard, &li); if (r) { - DMERR("could not load origin discards"); + DMERR("%s: could not load origin discards", cache_device_name(cache)); metadata_operation_failed(cache, "dm_cache_load_discards", r); return r; } @@ -3503,16 +3521,17 @@ static void cache_status(struct dm_target *ti, status_type_t type, if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) (void) commit(cache, false); - r = dm_cache_get_free_metadata_block_count(cache->cmd, - &nr_free_blocks_metadata); + r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata); if (r) { - DMERR("could not get metadata free block count"); + DMERR("%s: dm_cache_get_free_metadata_block_count returned %d", + cache_device_name(cache), r); goto err; } r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); if (r) { - DMERR("could not get metadata device size"); + DMERR("%s: dm_cache_get_metadata_dev_size returned %d", + cache_device_name(cache), r); goto err; } @@ -3543,7 +3562,8 @@ static void cache_status(struct dm_target *ti, status_type_t type, DMEMIT("1 writeback "); else { - DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); + DMERR("%s: internal error: unknown io mode: %d", + cache_device_name(cache), (int) cache->features.io_mode); goto err; } @@ -3553,7 +3573,8 @@ static void cache_status(struct dm_target *ti, status_type_t type, if (sz < maxlen) { r = policy_emit_config_values(cache->policy, result, maxlen, &sz); if (r) - DMERR("policy_emit_config_values returned %d", r); + DMERR("%s: policy_emit_config_values returned %d", + cache_device_name(cache), r); } if (get_cache_mode(cache) == CM_READ_ONLY) @@ -3622,7 +3643,7 @@ static int parse_cblock_range(struct cache *cache, const char *str, return 0; } - DMERR("invalid cblock range '%s'", str); + DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str); return -EINVAL; } @@ -3633,17 +3654,20 @@ static int validate_cblock_range(struct cache *cache, struct cblock_range *range uint64_t n = from_cblock(cache->cache_size); if (b >= n) { - DMERR("begin cblock out of range: %llu >= %llu", b, n); + DMERR("%s: begin cblock out of range: %llu >= %llu", + cache_device_name(cache), b, n); return -EINVAL; } if (e > n) { - DMERR("end cblock out of range: %llu > %llu", e, n); + DMERR("%s: end cblock out of range: %llu > %llu", + cache_device_name(cache), e, n); return -EINVAL; } if (b >= e) { - DMERR("invalid cblock range: %llu >= %llu", b, e); + DMERR("%s: invalid cblock range: %llu >= %llu", + cache_device_name(cache), b, e); return -EINVAL; } @@ -3677,7 +3701,8 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun struct cblock_range range; if (!passthrough_mode(&cache->features)) { - DMERR("cache has to be in passthrough mode for invalidation"); + DMERR("%s: cache has to be in passthrough mode for invalidation", + cache_device_name(cache)); return -EPERM; } @@ -3717,7 +3742,8 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; if (get_cache_mode(cache) >= CM_READ_ONLY) { - DMERR("unable to service cache target messages in READ_ONLY or FAIL mode"); + DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode", + cache_device_name(cache)); return -EOPNOTSUPP; } -- cgit From fba10109a45d864bab98ae90dd63bcc2789352b3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 29 May 2015 10:20:56 +0100 Subject: dm cache: age and write back cache entries even without active IO The policy tick() method is normally called from interrupt context. Both the mq and smq policies do some bottom half work for the tick method in their map functions. However if no IO is going through the cache, then that bottom half work doesn't occur. With these policies this means recently hit entries do not age and do not get written back as early as we'd like. Fix this by introducing a new 'can_block' parameter to the tick() method. When this is set the bottom half work occurs immediately. 'can_block' is set when the tick method is called every second by the core target (not in interrupt context). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 5aad875b822c..1b4e1756b169 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2271,7 +2271,7 @@ static void do_worker(struct work_struct *ws) static void do_waker(struct work_struct *ws) { struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); - policy_tick(cache->policy); + policy_tick(cache->policy, true); wake_worker(cache); queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); } @@ -3148,7 +3148,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); if (pb->tick) { - policy_tick(cache->policy); + policy_tick(cache->policy, false); spin_lock_irqsave(&cache->lock, flags); cache->need_tick_bio = true; -- cgit