diff options
Diffstat (limited to 'drivers/md/dm-snap.c')
| -rw-r--r-- | drivers/md/dm-snap.c | 867 |
1 files changed, 621 insertions, 246 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba41048b438..f40c18da4000 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * dm-snapshot.c - * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * * This file is released under the GPL. @@ -13,6 +12,7 @@ #include <linux/init.h> #include <linux/kdev_t.h> #include <linux/list.h> +#include <linux/list_bl.h> #include <linux/mempool.h> #include <linux/module.h> #include <linux/slab.h> @@ -42,8 +42,8 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; struct dm_exception_table { uint32_t hash_mask; - unsigned hash_shift; - struct list_head *table; + unsigned int hash_shift; + struct hlist_bl_head *table; }; struct dm_snapshot { @@ -75,7 +75,9 @@ struct dm_snapshot { atomic_t pending_exceptions_count; - /* Protected by "lock" */ + spinlock_t pe_allocation_lock; + + /* Protected by "pe_allocation_lock" */ sector_t exception_start_sequence; /* Protected by kcopyd single-threaded callback */ @@ -85,9 +87,9 @@ struct dm_snapshot { * A list of pending exceptions that completed out of order. * Protected by kcopyd single-threaded callback. */ - struct list_head out_of_order_list; + struct rb_root out_of_order_tree; - mempool_t *pending_pool; + mempool_t pending_pool; struct dm_exception_table pending; struct dm_exception_table complete; @@ -105,6 +107,9 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; + unsigned int in_progress; + struct wait_queue_head in_progress_wait; + struct dm_kcopyd_client *kcopyd_client; /* Wait for events based on state_bits */ @@ -118,16 +123,19 @@ struct dm_snapshot { * The merge operation failed if this flag is set. * Failure modes are handled as follows: * - I/O error reading the header - * => don't load the target; abort. + * => don't load the target; abort. * - Header does not have "valid" flag set - * => use the origin; forget about the snapshot. + * => use the origin; forget about the snapshot. * - I/O error when reading exceptions - * => don't load the target; abort. + * => don't load the target; abort. * (We can't use the intermediate origin state.) * - I/O error while merging * => stop merging; set merge_failed; process I/O normally. */ - int merge_failed; + bool merge_failed:1; + + bool discard_zeroes_cow:1; + bool discard_passdown_origin:1; /* * Incoming bios that overlap with chunks being merged must wait @@ -145,6 +153,19 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +/* + * Maximum number of chunks being copied on write. + * + * The value was decided experimentally as a trade-off between memory + * consumption, stalling the kernel's workqueues and maintaining a high enough + * throughput. + */ +#define DEFAULT_COW_THRESHOLD 2048 + +static unsigned int cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); +MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); + DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -200,7 +221,7 @@ struct dm_snap_pending_exception { /* A sequence number, it is used for in-order completion. */ sector_t exception_sequence; - struct list_head out_of_order_entry; + struct rb_node out_of_order_node; /* * For writing a complete chunk, bypassing the copy. @@ -224,12 +245,14 @@ struct dm_snap_tracked_chunk { static void init_tracked_chunk(struct bio *bio) { struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + INIT_HLIST_NODE(&c->node); } static bool is_bio_tracked(struct bio *bio) { struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + return !hlist_unhashed(&c->node); } @@ -277,12 +300,12 @@ static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) /* * This conflicting I/O is extremely improbable in the caller, - * so msleep(1) is sufficient and there is no need for a wait queue. + * so fsleep(1000) is sufficient and there is no need for a wait queue. */ static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) { while (__chunk_is_tracked(s, chunk)) - msleep(1); + fsleep(1000); } /* @@ -304,7 +327,7 @@ struct origin { struct dm_origin { struct dm_dev *dev; struct dm_target *ti; - unsigned split_boundary; + unsigned int split_boundary; struct list_head hash_list; }; @@ -326,8 +349,8 @@ static int init_origin_hash(void) { int i; - _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), - GFP_KERNEL); + _origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head), + GFP_KERNEL); if (!_origins) { DMERR("unable to allocate memory for _origins"); return -ENOMEM; @@ -335,8 +358,9 @@ static int init_origin_hash(void) for (i = 0; i < ORIGIN_HASH_SIZE; i++) INIT_LIST_HEAD(_origins + i); - _dm_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), - GFP_KERNEL); + _dm_origins = kmalloc_array(ORIGIN_HASH_SIZE, + sizeof(struct list_head), + GFP_KERNEL); if (!_dm_origins) { DMERR("unable to allocate memory for _dm_origins"); kfree(_origins); @@ -356,7 +380,7 @@ static void exit_origin_hash(void) kfree(_dm_origins); } -static unsigned origin_hash(struct block_device *bdev) +static unsigned int origin_hash(struct block_device *bdev) { return bdev->bd_dev & ORIGIN_MASK; } @@ -367,7 +391,7 @@ static struct origin *__lookup_origin(struct block_device *origin) struct origin *o; ol = &_origins[origin_hash(origin)]; - list_for_each_entry (o, ol, hash_list) + list_for_each_entry(o, ol, hash_list) if (bdev_equal(o->bdev, origin)) return o; @@ -377,6 +401,7 @@ static struct origin *__lookup_origin(struct block_device *origin) static void __insert_origin(struct origin *o) { struct list_head *sl = &_origins[origin_hash(o->bdev)]; + list_add_tail(&o->hash_list, sl); } @@ -386,7 +411,7 @@ static struct dm_origin *__lookup_dm_origin(struct block_device *origin) struct dm_origin *o; ol = &_dm_origins[origin_hash(origin)]; - list_for_each_entry (o, ol, hash_list) + list_for_each_entry(o, ol, hash_list) if (bdev_equal(o->dev->bdev, origin)) return o; @@ -396,6 +421,7 @@ static struct dm_origin *__lookup_dm_origin(struct block_device *origin) static void __insert_dm_origin(struct dm_origin *o) { struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)]; + list_add_tail(&o->hash_list, sl); } @@ -469,8 +495,7 @@ static int __validate_exception_handover(struct dm_snapshot *snap) if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, &snap_merge) == 2) || snap_dest) { - snap->ti->error = "Snapshot cow pairing for exception " - "table handover failed"; + snap->ti->error = "Snapshot cow pairing for exception table handover failed"; return -EINVAL; } @@ -497,8 +522,7 @@ static int __validate_exception_handover(struct dm_snapshot *snap) if (!snap_src->store->type->prepare_merge || !snap_src->store->type->commit_merge) { - snap->ti->error = "Snapshot exception store does not " - "support snapshot-merge."; + snap->ti->error = "Snapshot exception store does not support snapshot-merge."; return -EINVAL; } @@ -600,19 +624,50 @@ static void unregister_snapshot(struct dm_snapshot *s) * The lowest hash_shift bits of the chunk number are ignored, allowing * some consecutive chunks to be grouped together. */ +static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); + +/* Lock to protect access to the completed and pending exception hash tables. */ +struct dm_exception_table_lock { + struct hlist_bl_head *complete_slot; + struct hlist_bl_head *pending_slot; +}; + +static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, + struct dm_exception_table_lock *lock) +{ + struct dm_exception_table *complete = &s->complete; + struct dm_exception_table *pending = &s->pending; + + lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; + lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; +} + +static void dm_exception_table_lock(struct dm_exception_table_lock *lock) +{ + hlist_bl_lock(lock->complete_slot); + hlist_bl_lock(lock->pending_slot); +} + +static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) +{ + hlist_bl_unlock(lock->pending_slot); + hlist_bl_unlock(lock->complete_slot); +} + static int dm_exception_table_init(struct dm_exception_table *et, - uint32_t size, unsigned hash_shift) + uint32_t size, unsigned int hash_shift) { unsigned int i; et->hash_shift = hash_shift; et->hash_mask = size - 1; - et->table = dm_vcalloc(size, sizeof(struct list_head)); + et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head), + GFP_KERNEL); if (!et->table) return -ENOMEM; for (i = 0; i < size; i++) - INIT_LIST_HEAD(et->table + i); + INIT_HLIST_BL_HEAD(et->table + i); return 0; } @@ -620,19 +675,22 @@ static int dm_exception_table_init(struct dm_exception_table *et, static void dm_exception_table_exit(struct dm_exception_table *et, struct kmem_cache *mem) { - struct list_head *slot; - struct dm_exception *ex, *next; + struct hlist_bl_head *slot; + struct dm_exception *ex; + struct hlist_bl_node *pos, *n; int i, size; size = et->hash_mask + 1; for (i = 0; i < size; i++) { slot = et->table + i; - list_for_each_entry_safe (ex, next, slot, hash_list) + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { kmem_cache_free(mem, ex); + cond_resched(); + } } - vfree(et->table); + kvfree(et->table); } static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) @@ -642,7 +700,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) static void dm_remove_exception(struct dm_exception *e) { - list_del(&e->hash_list); + hlist_bl_del(&e->hash_list); } /* @@ -652,11 +710,12 @@ static void dm_remove_exception(struct dm_exception *e) static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, chunk_t chunk) { - struct list_head *slot; + struct hlist_bl_head *slot; + struct hlist_bl_node *pos; struct dm_exception *e; slot = &et->table[exception_hash(et, chunk)]; - list_for_each_entry (e, slot, hash_list) + hlist_bl_for_each_entry(e, pos, slot, hash_list) if (chunk >= e->old_chunk && chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) return e; @@ -682,7 +741,7 @@ static void free_completed_exception(struct dm_exception *e) static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) { - struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, + struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool, GFP_NOIO); atomic_inc(&s->pending_exceptions_count); @@ -695,7 +754,7 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; - mempool_free(pe, s->pending_pool); + mempool_free(pe, &s->pending_pool); smp_mb__before_atomic(); atomic_dec(&s->pending_exceptions_count); } @@ -703,7 +762,8 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe) static void dm_insert_exception(struct dm_exception_table *eh, struct dm_exception *new_e) { - struct list_head *l; + struct hlist_bl_head *l; + struct hlist_bl_node *pos; struct dm_exception *e = NULL; l = &eh->table[exception_hash(eh, new_e->old_chunk)]; @@ -713,7 +773,7 @@ static void dm_insert_exception(struct dm_exception_table *eh, goto out; /* List is ordered by old_chunk */ - list_for_each_entry_reverse(e, l, hash_list) { + hlist_bl_for_each_entry(e, pos, l, hash_list) { /* Insert after an existing chunk? */ if (new_e->old_chunk == (e->old_chunk + dm_consecutive_chunk_count(e) + 1) && @@ -734,12 +794,24 @@ static void dm_insert_exception(struct dm_exception_table *eh, return; } - if (new_e->old_chunk > e->old_chunk) + if (new_e->old_chunk < e->old_chunk) break; } out: - list_add(&new_e->hash_list, e ? &e->hash_list : l); + if (!e) { + /* + * Either the table doesn't support consecutive chunks or slot + * l is empty. + */ + hlist_bl_add_head(&new_e->hash_list, l); + } else if (new_e->old_chunk < e->old_chunk) { + /* Add before an existing exception */ + hlist_bl_add_before(&new_e->hash_list, &e->hash_list); + } else { + /* Add to l's tail: e is the last exception in this slot */ + hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); + } } /* @@ -748,6 +820,7 @@ out: */ static int dm_add_exception(void *context, chunk_t old, chunk_t new) { + struct dm_exception_table_lock lock; struct dm_snapshot *s = context; struct dm_exception *e; @@ -760,7 +833,17 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) /* Consecutive_count is implicitly initialised to zero */ e->new_chunk = new; + /* + * Although there is no need to lock access to the exception tables + * here, if we don't then hlist_bl_add_head(), called by + * dm_insert_exception(), will complain about accessing the + * corresponding list without locking it first. + */ + dm_exception_table_lock_init(s, old, &lock); + + dm_exception_table_lock(&lock); dm_insert_exception(&s->complete, e); + dm_exception_table_unlock(&lock); return 0; } @@ -772,7 +855,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; - unsigned chunk_size = 0; + unsigned int chunk_size = rounddown_pow_of_two(UINT_MAX); if (o) list_for_each_entry(snap, &o->snapshots, list) @@ -789,7 +872,8 @@ static int calc_max_buckets(void) { /* use a fixed size of 2MB */ unsigned long mem = 2 * 1024 * 1024; - mem /= sizeof(struct list_head); + + mem /= sizeof(struct hlist_bl_head); return mem; } @@ -859,8 +943,7 @@ static int __remove_single_exception_chunk(struct dm_snapshot *s, e = dm_lookup_exception(&s->complete, old_chunk); if (!e) { - DMERR("Corruption detected: exception for block %llu is " - "on disk but not in memory", + DMERR("Corruption detected: exception for block %llu is on disk but not in memory", (unsigned long long)old_chunk); return -EINVAL; } @@ -887,8 +970,7 @@ static int __remove_single_exception_chunk(struct dm_snapshot *s, e->new_chunk++; } else if (old_chunk != e->old_chunk + dm_consecutive_chunk_count(e)) { - DMERR("Attempt to merge block %llu from the " - "middle of a chunk range [%llu - %llu]", + DMERR("Attempt to merge block %llu from the middle of a chunk range [%llu - %llu]", (unsigned long long)old_chunk, (unsigned long long)e->old_chunk, (unsigned long long) @@ -932,7 +1014,7 @@ out: } static int origin_write_extent(struct dm_snapshot *merging_snap, - sector_t sector, unsigned chunk_size); + sector_t sector, unsigned int chunk_size); static void merge_callback(int read_err, unsigned long write_err, void *context); @@ -981,10 +1063,9 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) &new_chunk); if (linear_chunks <= 0) { if (linear_chunks < 0) { - DMERR("Read error in exception store: " - "shutting down merge"); + DMERR("Read error in exception store: shutting down merge"); down_write(&s->lock); - s->merge_failed = 1; + s->merge_failed = true; up_write(&s->lock); } goto shut; @@ -1057,6 +1138,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) goto shut; } + if (blkdev_issue_flush(s->origin->bdev) < 0) { + DMERR("Flush after merge failed: shutting down merge"); + goto shut; + } + if (s->store->type->commit_merge(s->store, s->num_merging_chunks) < 0) { DMERR("Write error in exception store: shutting down merge"); @@ -1072,7 +1158,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) shut: down_write(&s->lock); - s->merge_failed = 1; + s->merge_failed = true; b = __release_queued_bios_after_merge(s); up_write(&s->lock); error_bios(b); @@ -1096,37 +1182,95 @@ static void stop_merge(struct dm_snapshot *s) clear_bit(SHUTDOWN_MERGE, &s->state_bits); } +static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s, + struct dm_target *ti) +{ + int r; + unsigned int argc; + const char *arg_name; + + static const struct dm_arg _args[] = { + {0, 2, "Invalid number of feature arguments"}, + }; + + /* + * No feature arguments supplied. + */ + if (!as->argc) + return 0; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + while (argc && !r) { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, "discard_zeroes_cow")) + s->discard_zeroes_cow = true; + + else if (!strcasecmp(arg_name, "discard_passdown_origin")) + s->discard_passdown_origin = true; + + else { + ti->error = "Unrecognised feature requested"; + r = -EINVAL; + break; + } + } + + if (!s->discard_zeroes_cow && s->discard_passdown_origin) { + /* + * TODO: really these are disjoint.. but ti->num_discard_bios + * and dm_bio_get_target_bio_nr() require rigid constraints. + */ + ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow"; + r = -EINVAL; + } + + return r; +} + /* - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> + * Construct a snapshot mapping: + * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*] */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + struct dm_arg_set as; int i; int r = -EINVAL; char *origin_path, *cow_path; - dev_t origin_dev, cow_dev; - unsigned args_used, num_flush_bios = 1; - fmode_t origin_mode = FMODE_READ; + unsigned int args_used, num_flush_bios = 1; + blk_mode_t origin_mode = BLK_OPEN_READ; - if (argc != 4) { - ti->error = "requires exactly 4 arguments"; + if (argc < 4) { + ti->error = "requires 4 or more arguments"; r = -EINVAL; goto bad; } if (dm_target_is_snapshot_merge(ti)) { num_flush_bios = 2; - origin_mode = FMODE_WRITE; + origin_mode = BLK_OPEN_WRITE; } - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) { ti->error = "Cannot allocate private snapshot structure"; r = -ENOMEM; goto bad; } + as.argc = argc; + as.argv = argv; + dm_consume_args(&as, 4); + r = parse_snapshot_features(&as, s, ti); + if (r) + goto bad_features; + origin_path = argv[0]; argv++; argc--; @@ -1136,24 +1280,21 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Cannot get origin device"; goto bad_origin; } - origin_dev = s->origin->bdev->bd_dev; cow_path = argv[0]; argv++; argc--; - cow_dev = dm_get_dev_t(cow_path); - if (cow_dev && cow_dev == origin_dev) { - ti->error = "COW device cannot be the same as origin device"; - r = -EINVAL; - goto bad_cow; - } - r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); if (r) { ti->error = "Cannot get COW device"; goto bad_cow; } + if (s->cow->bdev && s->cow->bdev == s->origin->bdev) { + ti->error = "COW device cannot be the same as origin device"; + r = -EINVAL; + goto bad_store; + } r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); if (r) { @@ -1170,14 +1311,15 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->snapshot_overflowed = 0; s->active = 0; atomic_set(&s->pending_exceptions_count, 0); + spin_lock_init(&s->pe_allocation_lock); s->exception_start_sequence = 0; s->exception_complete_sequence = 0; - INIT_LIST_HEAD(&s->out_of_order_list); + s->out_of_order_tree = RB_ROOT; init_rwsem(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); s->state_bits = 0; - s->merge_failed = 0; + s->merge_failed = false; s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); @@ -1189,6 +1331,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } + init_waitqueue_head(&s->in_progress_wait); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); @@ -1196,10 +1340,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_kcopyd; } - s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); - if (!s->pending_pool) { + r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache); + if (r) { ti->error = "Could not allocate mempool for pending exceptions"; - r = -ENOMEM; goto bad_pending_pool; } @@ -1210,6 +1353,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_bios = num_flush_bios; + if (s->discard_zeroes_cow) + ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1); ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1246,6 +1391,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!s->store->chunk_size) { ti->error = "Chunk size not set"; + r = -EINVAL; goto bad_read_metadata; } @@ -1257,29 +1403,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad_read_metadata: unregister_snapshot(s); - bad_load_and_register: - mempool_destroy(s->pending_pool); - + mempool_exit(&s->pending_pool); bad_pending_pool: dm_kcopyd_client_destroy(s->kcopyd_client); - bad_kcopyd: dm_exception_table_exit(&s->pending, pending_cache); dm_exception_table_exit(&s->complete, exception_cache); - bad_hash_tables: dm_exception_store_destroy(s->store); - bad_store: dm_put_device(ti, s->cow); - bad_cow: dm_put_device(ti, s->origin); - bad_origin: +bad_features: kfree(s); - bad: return r; } @@ -1353,9 +1492,9 @@ static void snapshot_dtr(struct dm_target *ti) unregister_snapshot(s); while (atomic_read(&s->pending_exceptions_count)) - msleep(1); + fsleep(1000); /* - * Ensure instructions in mempool_destroy aren't reordered + * Ensure instructions in mempool_exit aren't reordered * before atomic_read. */ smp_mb(); @@ -1367,7 +1506,7 @@ static void snapshot_dtr(struct dm_target *ti) __free_exceptions(s); - mempool_destroy(s->pending_pool); + mempool_exit(&s->pending_pool); dm_exception_store_destroy(s->store); @@ -1375,9 +1514,57 @@ static void snapshot_dtr(struct dm_target *ti) dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; +} + /* * Flush a list of buffers. */ @@ -1388,12 +1575,12 @@ static void flush_bios(struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - generic_make_request(bio); + submit_bio_noacct(bio); bio = n; } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1406,9 +1593,9 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) - generic_make_request(bio); + submit_bio_noacct(bio); bio = n; } } @@ -1446,6 +1633,13 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) dm_table_event(s->ti->table); } +static void invalidate_snapshot(struct dm_snapshot *s, int err) +{ + down_write(&s->lock); + __invalidate_snapshot(s, err); + up_write(&s->lock); +} + static void pending_complete(void *context, int success) { struct dm_snap_pending_exception *pe = context; @@ -1454,43 +1648,63 @@ static void pending_complete(void *context, int success) struct bio *origin_bios = NULL; struct bio *snapshot_bios = NULL; struct bio *full_bio = NULL; + struct dm_exception_table_lock lock; int error = 0; + dm_exception_table_lock_init(s, pe->e.old_chunk, &lock); + if (!success) { /* Read/write error - snapshot is unusable */ - down_write(&s->lock); - __invalidate_snapshot(s, -EIO); + invalidate_snapshot(s, -EIO); error = 1; + + dm_exception_table_lock(&lock); goto out; } e = alloc_completed_exception(GFP_NOIO); if (!e) { - down_write(&s->lock); - __invalidate_snapshot(s, -ENOMEM); + invalidate_snapshot(s, -ENOMEM); error = 1; + + dm_exception_table_lock(&lock); goto out; } *e = pe->e; - down_write(&s->lock); + down_read(&s->lock); + dm_exception_table_lock(&lock); if (!s->valid) { + up_read(&s->lock); free_completed_exception(e); error = 1; + goto out; } - /* Check for conflicting reads */ - __check_for_conflicting_io(s, pe->e.old_chunk); - /* - * Add a proper exception, and remove the - * in-flight exception from the list. + * Add a proper exception. After inserting the completed exception all + * subsequent snapshot reads to this chunk will be redirected to the + * COW device. This ensures that we do not starve. Moreover, as long + * as the pending exception exists, neither origin writes nor snapshot + * merging can overwrite the chunk in origin. */ dm_insert_exception(&s->complete, e); + up_read(&s->lock); + + /* Wait for conflicting reads to drain */ + if (__chunk_is_tracked(s, pe->e.old_chunk)) { + dm_exception_table_unlock(&lock); + __check_for_conflicting_io(s, pe->e.old_chunk); + dm_exception_table_lock(&lock); + } out: + /* Remove the in-flight exception from the list */ dm_remove_exception(&pe->e); + + dm_exception_table_unlock(&lock); + snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); full_bio = pe->full_bio; @@ -1498,8 +1712,6 @@ out: full_bio->bi_end_io = pe->full_bio_end_io; increment_pending_exceptions_done_count(); - up_write(&s->lock); - /* Submit any pending write bios */ if (error) { if (full_bio) @@ -1537,29 +1749,43 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) pe->copy_error = read_err || write_err; if (pe->exception_sequence == s->exception_complete_sequence) { + struct rb_node *next; + s->exception_complete_sequence++; complete_exception(pe); - while (!list_empty(&s->out_of_order_list)) { - pe = list_entry(s->out_of_order_list.next, - struct dm_snap_pending_exception, out_of_order_entry); + next = rb_first(&s->out_of_order_tree); + while (next) { + pe = rb_entry(next, struct dm_snap_pending_exception, + out_of_order_node); if (pe->exception_sequence != s->exception_complete_sequence) break; + next = rb_next(next); s->exception_complete_sequence++; - list_del(&pe->out_of_order_entry); + rb_erase(&pe->out_of_order_node, &s->out_of_order_tree); complete_exception(pe); + cond_resched(); } } else { - struct list_head *lh; + struct rb_node *parent = NULL; + struct rb_node **p = &s->out_of_order_tree.rb_node; struct dm_snap_pending_exception *pe2; - list_for_each_prev(lh, &s->out_of_order_list) { - pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry); - if (pe2->exception_sequence < pe->exception_sequence) - break; + while (*p) { + pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node); + parent = *p; + + BUG_ON(pe->exception_sequence == pe2->exception_sequence); + if (pe->exception_sequence < pe2->exception_sequence) + p = &((*p)->rb_left); + else + p = &((*p)->rb_right); } - list_add(&pe->out_of_order_entry, lh); + + rb_link_node(&pe->out_of_order_node, parent, p); + rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } + account_end_copy(s); } /* @@ -1583,6 +1809,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1602,13 +1829,14 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); bio->bi_end_io = full_bio_end_io; bio->bi_private = callback_data; - generic_make_request(bio); + submit_bio_noacct(bio); } static struct dm_snap_pending_exception * @@ -1623,53 +1851,100 @@ __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) } /* - * Looks to see if this snapshot already has a pending exception - * for this chunk, otherwise it allocates a new one and inserts - * it into the pending table. + * Inserts a pending exception into the pending table. * - * NOTE: a write lock must be held on snap->lock before calling - * this. + * NOTE: a write lock must be held on the chunk's pending exception table slot + * before calling this. */ static struct dm_snap_pending_exception * -__find_pending_exception(struct dm_snapshot *s, - struct dm_snap_pending_exception *pe, chunk_t chunk) +__insert_pending_exception(struct dm_snapshot *s, + struct dm_snap_pending_exception *pe, chunk_t chunk) { - struct dm_snap_pending_exception *pe2; - - pe2 = __lookup_pending_exception(s, chunk); - if (pe2) { - free_pending_exception(pe); - return pe2; - } - pe->e.old_chunk = chunk; bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); pe->started = 0; pe->full_bio = NULL; + spin_lock(&s->pe_allocation_lock); if (s->store->type->prepare_exception(s->store, &pe->e)) { + spin_unlock(&s->pe_allocation_lock); free_pending_exception(pe); return NULL; } pe->exception_sequence = s->exception_start_sequence++; + spin_unlock(&s->pe_allocation_lock); dm_insert_exception(&s->pending, &pe->e); return pe; } +/* + * Looks to see if this snapshot already has a pending exception + * for this chunk, otherwise it allocates a new one and inserts + * it into the pending table. + * + * NOTE: a write lock must be held on the chunk's pending exception table slot + * before calling this. + */ +static struct dm_snap_pending_exception * +__find_pending_exception(struct dm_snapshot *s, + struct dm_snap_pending_exception *pe, chunk_t chunk) +{ + struct dm_snap_pending_exception *pe2; + + pe2 = __lookup_pending_exception(s, chunk); + if (pe2) { + free_pending_exception(pe); + return pe2; + } + + return __insert_pending_exception(s, pe, chunk); +} + static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, struct bio *bio, chunk_t chunk) { - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); bio->bi_iter.bi_sector = chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) + (chunk - e->old_chunk)) + (bio->bi_iter.bi_sector & s->store->chunk_mask); } +static void zero_callback(int read_err, unsigned long write_err, void *context) +{ + struct bio *bio = context; + struct dm_snapshot *s = bio->bi_private; + + account_end_copy(s); + bio->bi_status = write_err ? BLK_STS_IOERR : 0; + bio_endio(bio); +} + +static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, + struct bio *bio, chunk_t chunk) +{ + struct dm_io_region dest; + + dest.bdev = s->cow->bdev; + dest.sector = bio->bi_iter.bi_sector; + dest.count = s->store->chunk_size; + + account_start_copy(s); + WARN_ON_ONCE(bio->bi_private); + bio->bi_private = s; + dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); +} + +static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio) +{ + return bio->bi_iter.bi_size == + (s->store->chunk_size << SECTOR_SHIFT); +} + static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; @@ -1677,24 +1952,30 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) int r = DM_MAPIO_REMAPPED; chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; + struct dm_exception_table_lock lock; init_tracked_chunk(bio); if (bio->bi_opf & REQ_PREFLUSH) { - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED; } chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); + dm_exception_table_lock_init(s, chunk, &lock); /* Full snapshots are not usable */ /* To get here the table must be live so s->active is always set. */ if (!s->valid) return DM_MAPIO_KILL; - /* FIXME: should only take write lock if we need - * to copy an exception */ - down_write(&s->lock); + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + + down_read(&s->lock); + dm_exception_table_lock(&lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_data_dir(bio) == WRITE)) { @@ -1702,30 +1983,57 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) goto out_unlock; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { + /* + * passdown discard to origin (without triggering + * snapshot exceptions via do_origin; doing so would + * defeat the goal of freeing space in origin that is + * implied by the "discard_passdown_origin" feature) + */ + bio_set_dev(bio, s->origin->bdev); + track_chunk(s, bio, chunk); + goto out_unlock; + } + /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */ + } + /* If the block is already remapped - use that, else remap it */ e = dm_lookup_exception(&s->complete, chunk); if (e) { remap_exception(s, e, bio, chunk); + if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && + io_overlaps_chunk(s, bio)) { + dm_exception_table_unlock(&lock); + up_read(&s->lock); + zero_exception(s, e, bio, chunk); + r = DM_MAPIO_SUBMITTED; /* discard is not issued */ + goto out; + } + goto out_unlock; + } + + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* + * If no exception exists, complete discard immediately + * otherwise it'll trigger copy-out. + */ + bio_endio(bio); + r = DM_MAPIO_SUBMITTED; goto out_unlock; } /* * Write to snapshot - higher level takes care of RW/RO * flags so we should only get this if we are - * writeable. + * writable. */ if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { - up_write(&s->lock); + dm_exception_table_unlock(&lock); pe = alloc_pending_exception(s); - down_write(&s->lock); - - if (!s->valid || s->snapshot_overflowed) { - free_pending_exception(pe); - r = DM_MAPIO_KILL; - goto out_unlock; - } + dm_exception_table_lock(&lock); e = dm_lookup_exception(&s->complete, chunk); if (e) { @@ -1736,13 +2044,22 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = __find_pending_exception(s, pe, chunk); if (!pe) { + dm_exception_table_unlock(&lock); + up_read(&s->lock); + + down_write(&s->lock); + if (s->store->userspace_supports_overflow) { - s->snapshot_overflowed = 1; - DMERR("Snapshot overflowed: Unable to allocate exception."); + if (s->valid && !s->snapshot_overflowed) { + s->snapshot_overflowed = 1; + DMERR("Snapshot overflowed: Unable to allocate exception."); + } } else __invalidate_snapshot(s, -ENOMEM); + up_write(&s->lock); + r = DM_MAPIO_KILL; - goto out_unlock; + goto out; } } @@ -1750,11 +2067,12 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) r = DM_MAPIO_SUBMITTED; - if (!pe->started && - bio->bi_iter.bi_size == - (s->store->chunk_size << SECTOR_SHIFT)) { + if (!pe->started && io_overlaps_chunk(s, bio)) { pe->started = 1; - up_write(&s->lock); + + dm_exception_table_unlock(&lock); + up_read(&s->lock); + start_full_bio(pe, bio); goto out; } @@ -1762,19 +2080,23 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) bio_list_add(&pe->snapshot_bios, bio); if (!pe->started) { - /* this is protected by snap->lock */ + /* this is protected by the exception table lock */ pe->started = 1; - up_write(&s->lock); + + dm_exception_table_unlock(&lock); + up_read(&s->lock); + start_copy(pe); goto out; } } else { - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); track_chunk(s, bio, chunk); } out_unlock: - up_write(&s->lock); + dm_exception_table_unlock(&lock); + up_read(&s->lock); out: return r; } @@ -1802,12 +2124,18 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) if (bio->bi_opf & REQ_PREFLUSH) { if (!dm_bio_get_target_bio_nr(bio)) - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); else - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* Once merging, discards no longer effect change */ + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); down_write(&s->lock); @@ -1824,7 +2152,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) chunk >= s->first_merging_chunk && chunk < (s->first_merging_chunk + s->num_merging_chunks)) { - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); bio_list_add(&s->bios_queued_during_merge, bio); r = DM_MAPIO_SUBMITTED; goto out_unlock; @@ -1838,11 +2166,11 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) } redirect_to_origin: - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); - return do_origin(s->origin, bio); + return do_origin(s->origin, bio, false); } out_unlock: @@ -1880,12 +2208,10 @@ static int snapshot_preresume(struct dm_target *ti) if (snap_src && snap_dest) { down_read(&snap_src->lock); if (s == snap_src) { - DMERR("Unable to resume snapshot source until " - "handover completes."); + DMERR("Unable to resume snapshot source until handover completes."); r = -EINVAL; } else if (!dm_suspended(snap_src->ti)) { - DMERR("Unable to perform snapshot handover until " - "source is suspended."); + DMERR("Unable to perform snapshot handover until source is suspended."); r = -EINVAL; } up_read(&snap_src->lock); @@ -1987,10 +2313,11 @@ static void snapshot_merge_resume(struct dm_target *ti) } static void snapshot_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) + unsigned int status_flags, char *result, unsigned int maxlen) { - unsigned sz = 0; + unsigned int sz = 0; struct dm_snapshot *snap = ti->private; + unsigned int num_features; switch (type) { case STATUSTYPE_INFO: @@ -2015,8 +2342,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, (unsigned long long)sectors_allocated, (unsigned long long)total_sectors, (unsigned long long)metadata_sectors); - } - else + } else DMEMIT("Unknown"); } @@ -2031,8 +2357,26 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, * make sense. */ DMEMIT("%s %s", snap->origin->name, snap->cow->name); - snap->store->type->status(snap->store, type, result + sz, - maxlen - sz); + sz += snap->store->type->status(snap->store, type, result + sz, + maxlen - sz); + num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; + if (num_features) { + DMEMIT(" %u", num_features); + if (snap->discard_zeroes_cow) + DMEMIT(" discard_zeroes_cow"); + if (snap->discard_passdown_origin) + DMEMIT(" discard_passdown_origin"); + } + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",snap_origin_name=%s", snap->origin->name); + DMEMIT(",snap_cow_name=%s", snap->cow->name); + DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n'); + DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n'); + DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n'); + DMEMIT(";"); break; } } @@ -2051,12 +2395,33 @@ static int snapshot_iterate_devices(struct dm_target *ti, return r; } +static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct dm_snapshot *snap = ti->private; + + if (snap->discard_zeroes_cow) { + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; -/*----------------------------------------------------------------- - * Origin methods - *---------------------------------------------------------------*/ + down_read(&_origins_lock); + + (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL); + if (snap_src && snap_dest) + snap = snap_src; + + /* All discards are split on chunk_size boundary */ + limits->discard_granularity = snap->store->chunk_size; + limits->max_hw_discard_sectors = snap->store->chunk_size; + + up_read(&_origins_lock); + } +} /* + *--------------------------------------------------------------- + * Origin methods + *--------------------------------------------------------------- + */ +/* * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any * supplied bio was ignored. The caller may submit it immediately. * (No remapping actually occurs as the origin is always a direct linear @@ -2072,13 +2437,14 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, int r = DM_MAPIO_REMAPPED; struct dm_snapshot *snap; struct dm_exception *e; - struct dm_snap_pending_exception *pe; + struct dm_snap_pending_exception *pe, *pe2; struct dm_snap_pending_exception *pe_to_start_now = NULL; struct dm_snap_pending_exception *pe_to_start_last = NULL; + struct dm_exception_table_lock lock; chunk_t chunk; /* Do all the snapshots on this origin */ - list_for_each_entry (snap, snapshots, list) { + list_for_each_entry(snap, snapshots, list) { /* * Don't make new exceptions in a merging snapshot * because it has effectively been deleted @@ -2086,52 +2452,59 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, if (dm_target_is_snapshot_merge(snap->ti)) continue; - down_write(&snap->lock); - - /* Only deal with valid and active snapshots */ - if (!snap->valid || !snap->active) - goto next_snapshot; - /* Nothing to do if writing beyond end of snapshot */ if (sector >= dm_table_get_size(snap->ti->table)) - goto next_snapshot; + continue; /* * Remember, different snapshots can have * different chunk sizes. */ chunk = sector_to_chunk(snap->store, sector); + dm_exception_table_lock_init(snap, chunk, &lock); - /* - * Check exception table to see if block - * is already remapped in this snapshot - * and trigger an exception if not. - */ - e = dm_lookup_exception(&snap->complete, chunk); - if (e) + down_read(&snap->lock); + dm_exception_table_lock(&lock); + + /* Only deal with valid and active snapshots */ + if (!snap->valid || !snap->active) goto next_snapshot; pe = __lookup_pending_exception(snap, chunk); if (!pe) { - up_write(&snap->lock); - pe = alloc_pending_exception(snap); - down_write(&snap->lock); - - if (!snap->valid) { - free_pending_exception(pe); - goto next_snapshot; - } - + /* + * Check exception table to see if block is already + * remapped in this snapshot and trigger an exception + * if not. + */ e = dm_lookup_exception(&snap->complete, chunk); - if (e) { - free_pending_exception(pe); + if (e) goto next_snapshot; - } - pe = __find_pending_exception(snap, pe, chunk); - if (!pe) { - __invalidate_snapshot(snap, -ENOMEM); - goto next_snapshot; + dm_exception_table_unlock(&lock); + pe = alloc_pending_exception(snap); + dm_exception_table_lock(&lock); + + pe2 = __lookup_pending_exception(snap, chunk); + + if (!pe2) { + e = dm_lookup_exception(&snap->complete, chunk); + if (e) { + free_pending_exception(pe); + goto next_snapshot; + } + + pe = __insert_pending_exception(snap, pe, chunk); + if (!pe) { + dm_exception_table_unlock(&lock); + up_read(&snap->lock); + + invalidate_snapshot(snap, -ENOMEM); + continue; + } + } else { + free_pending_exception(pe); + pe = pe2; } } @@ -2158,7 +2531,8 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, } next_snapshot: - up_write(&snap->lock); + dm_exception_table_unlock(&lock); + up_read(&snap->lock); if (pe_to_start_now) { start_copy(pe_to_start_now); @@ -2179,15 +2553,25 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2207,7 +2591,7 @@ static int do_origin(struct dm_dev *origin, struct bio *bio) * size must be a multiple of merging_snap's chunk_size. */ static int origin_write_extent(struct dm_snapshot *merging_snap, - sector_t sector, unsigned size) + sector_t sector, unsigned int size) { int must_wait = 0; sector_t n; @@ -2283,9 +2667,9 @@ static void origin_dtr(struct dm_target *ti) static int origin_map(struct dm_target *ti, struct bio *bio) { struct dm_origin *o = ti->private; - unsigned available_sectors; + unsigned int available_sectors; - bio->bi_bdev = o->dev->bdev; + bio_set_dev(bio, o->dev->bdev); if (unlikely(bio->bi_opf & REQ_PREFLUSH)) return DM_MAPIO_REMAPPED; @@ -2294,20 +2678,13 @@ static int origin_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; available_sectors = o->split_boundary - - ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1)); + ((unsigned int)bio->bi_iter.bi_sector & (o->split_boundary - 1)); if (bio_sectors(bio) > available_sectors) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); -} - -static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) -{ - DMWARN("device does not support dax."); - return -EIO; + return do_origin(o->dev, bio, true); } /* @@ -2335,7 +2712,7 @@ static void origin_postsuspend(struct dm_target *ti) } static void origin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) + unsigned int status_flags, char *result, unsigned int maxlen) { struct dm_origin *o = ti->private; @@ -2347,6 +2724,9 @@ static void origin_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: snprintf(result, maxlen, "%s", o->dev->name); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } @@ -2369,12 +2749,11 @@ static struct target_type origin_target = { .postsuspend = origin_postsuspend, .status = origin_status, .iterate_devices = origin_iterate_devices, - .direct_access = origin_dax_direct_access, }; static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2384,11 +2763,12 @@ static struct target_type snapshot_target = { .resume = snapshot_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2399,6 +2779,7 @@ static struct target_type merge_target = { .resume = snapshot_merge_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static int __init dm_snapshot_init(void) @@ -2411,24 +2792,6 @@ static int __init dm_snapshot_init(void) return r; } - r = dm_register_target(&snapshot_target); - if (r < 0) { - DMERR("snapshot target register failed %d", r); - goto bad_register_snapshot_target; - } - - r = dm_register_target(&origin_target); - if (r < 0) { - DMERR("Origin target register failed %d", r); - goto bad_register_origin_target; - } - - r = dm_register_target(&merge_target); - if (r < 0) { - DMERR("Merge target register failed %d", r); - goto bad_register_merge_target; - } - r = init_origin_hash(); if (r) { DMERR("init_origin_hash failed."); @@ -2449,19 +2812,31 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } + r = dm_register_target(&snapshot_target); + if (r < 0) + goto bad_register_snapshot_target; + + r = dm_register_target(&origin_target); + if (r < 0) + goto bad_register_origin_target; + + r = dm_register_target(&merge_target); + if (r < 0) + goto bad_register_merge_target; + return 0; -bad_pending_cache: - kmem_cache_destroy(exception_cache); -bad_exception_cache: - exit_origin_hash(); -bad_origin_hash: - dm_unregister_target(&merge_target); bad_register_merge_target: dm_unregister_target(&origin_target); bad_register_origin_target: dm_unregister_target(&snapshot_target); bad_register_snapshot_target: + kmem_cache_destroy(pending_cache); +bad_pending_cache: + kmem_cache_destroy(exception_cache); +bad_exception_cache: + exit_origin_hash(); +bad_origin_hash: dm_exception_store_exit(); return r; |
