From be500ed721a6ec8d49bf0814c277ce7162acee0e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 13 Apr 2021 11:03:45 +0100 Subject: dm space maps: improve performance with inc/dec on ranges of blocks When we break sharing on btree nodes we typically need to increment the reference counts to every value held in the node. This can cause a lot of repeated calls to the space maps. Fix this by changing the interface to the space map inc/dec methods to take ranges of adjacent blocks to be operated on. For installations that are using a lot of snapshots this will reduce cpu overhead of fundamental operations such as provisioning a new block, or deleting a snapshot, by as much as 10 times. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-era-target.c | 24 +- drivers/md/dm-thin-metadata.c | 91 +++-- drivers/md/persistent-data/dm-array.c | 52 +-- drivers/md/persistent-data/dm-btree-internal.h | 13 + drivers/md/persistent-data/dm-btree-remove.c | 4 +- drivers/md/persistent-data/dm-btree-spine.c | 16 +- drivers/md/persistent-data/dm-btree.c | 91 ++++- drivers/md/persistent-data/dm-btree.h | 10 +- drivers/md/persistent-data/dm-space-map-common.c | 448 +++++++++++++++++++-- drivers/md/persistent-data/dm-space-map-common.h | 18 +- drivers/md/persistent-data/dm-space-map-disk.c | 74 +--- drivers/md/persistent-data/dm-space-map-metadata.c | 96 ++--- drivers/md/persistent-data/dm-space-map.h | 18 +- .../md/persistent-data/dm-transaction-manager.c | 52 +++ .../md/persistent-data/dm-transaction-manager.h | 12 +- 15 files changed, 774 insertions(+), 245 deletions(-) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index d9ac7372108c..3b748393fca5 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -363,28 +363,32 @@ static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata core->root = le64_to_cpu(disk->root); } -static void ws_inc(void *context, const void *value) +static void ws_inc(void *context, const void *value, unsigned count) { struct era_metadata *md = context; struct writeset_disk ws_d; dm_block_t b; + unsigned i; - memcpy(&ws_d, value, sizeof(ws_d)); - b = le64_to_cpu(ws_d.root); - - dm_tm_inc(md->tm, b); + for (i = 0; i < count; i++) { + memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d)); + b = le64_to_cpu(ws_d.root); + dm_tm_inc(md->tm, b); + } } -static void ws_dec(void *context, const void *value) +static void ws_dec(void *context, const void *value, unsigned count) { struct era_metadata *md = context; struct writeset_disk ws_d; dm_block_t b; + unsigned i; - memcpy(&ws_d, value, sizeof(ws_d)); - b = le64_to_cpu(ws_d.root); - - dm_bitset_del(&md->bitset_info, b); + for (i = 0; i < count; i++) { + memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d)); + b = le64_to_cpu(ws_d.root); + dm_bitset_del(&md->bitset_info, b); + } } static int ws_eq(void *context, const void *value1, const void *value2) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index e75b20480e46..c88ed14d49e6 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -311,28 +311,53 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t) *t = v & ((1 << 24) - 1); } -static void data_block_inc(void *context, const void *value_le) +/* + * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as + * possible. 'with_runs' reads contiguous runs of blocks, and calls the + * given sm function. + */ +typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t); + +static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn) { - struct dm_space_map *sm = context; - __le64 v_le; - uint64_t b; + uint64_t b, begin, end; uint32_t t; + bool in_run = false; + unsigned i; - memcpy(&v_le, value_le, sizeof(v_le)); - unpack_block_time(le64_to_cpu(v_le), &b, &t); - dm_sm_inc_block(sm, b); + for (i = 0; i < count; i++, value_le++) { + /* We know value_le is 8 byte aligned */ + unpack_block_time(le64_to_cpu(*value_le), &b, &t); + + if (in_run) { + if (b == end) { + end++; + } else { + fn(sm, begin, end); + begin = b; + end = b + 1; + } + } else { + in_run = true; + begin = b; + end = b + 1; + } + } + + if (in_run) + fn(sm, begin, end); } -static void data_block_dec(void *context, const void *value_le) +static void data_block_inc(void *context, const void *value_le, unsigned count) { - struct dm_space_map *sm = context; - __le64 v_le; - uint64_t b; - uint32_t t; + with_runs((struct dm_space_map *) context, + (const __le64 *) value_le, count, dm_sm_inc_blocks); +} - memcpy(&v_le, value_le, sizeof(v_le)); - unpack_block_time(le64_to_cpu(v_le), &b, &t); - dm_sm_dec_block(sm, b); +static void data_block_dec(void *context, const void *value_le, unsigned count) +{ + with_runs((struct dm_space_map *) context, + (const __le64 *) value_le, count, dm_sm_dec_blocks); } static int data_block_equal(void *context, const void *value1_le, const void *value2_le) @@ -349,27 +374,25 @@ static int data_block_equal(void *context, const void *value1_le, const void *va return b1 == b2; } -static void subtree_inc(void *context, const void *value) +static void subtree_inc(void *context, const void *value, unsigned count) { struct dm_btree_info *info = context; - __le64 root_le; - uint64_t root; + const __le64 *root_le = value; + unsigned i; - memcpy(&root_le, value, sizeof(root_le)); - root = le64_to_cpu(root_le); - dm_tm_inc(info->tm, root); + for (i = 0; i < count; i++, root_le++) + dm_tm_inc(info->tm, le64_to_cpu(*root_le)); } -static void subtree_dec(void *context, const void *value) +static void subtree_dec(void *context, const void *value, unsigned count) { struct dm_btree_info *info = context; - __le64 root_le; - uint64_t root; + const __le64 *root_le = value; + unsigned i; - memcpy(&root_le, value, sizeof(root_le)); - root = le64_to_cpu(root_le); - if (dm_btree_del(info, root)) - DMERR("btree delete failed"); + for (i = 0; i < count; i++, root_le++) + if (dm_btree_del(info, le64_to_cpu(*root_le))) + DMERR("btree delete failed"); } static int subtree_equal(void *context, const void *value1_le, const void *value2_le) @@ -1761,11 +1784,7 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_ int r = 0; pmd_write_lock(pmd); - for (; b != e; b++) { - r = dm_sm_inc_block(pmd->data_sm, b); - if (r) - break; - } + r = dm_sm_inc_blocks(pmd->data_sm, b, e); pmd_write_unlock(pmd); return r; @@ -1776,11 +1795,7 @@ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_ int r = 0; pmd_write_lock(pmd); - for (; b != e; b++) { - r = dm_sm_dec_block(pmd->data_sm, b); - if (r) - break; - } + r = dm_sm_dec_blocks(pmd->data_sm, b, e); pmd_write_unlock(pmd); return r; diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 185dc60360b5..3a963d783a86 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -108,12 +108,10 @@ static void *element_at(struct dm_array_info *info, struct array_block *ab, * in an array block. */ static void on_entries(struct dm_array_info *info, struct array_block *ab, - void (*fn)(void *, const void *)) + void (*fn)(void *, const void *, unsigned)) { - unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); - - for (i = 0; i < nr_entries; i++) - fn(info->value_type.context, element_at(info, ab, i)); + unsigned nr_entries = le32_to_cpu(ab->nr_entries); + fn(info->value_type.context, element_at(info, ab, 0), nr_entries); } /* @@ -175,19 +173,18 @@ static int alloc_ablock(struct dm_array_info *info, size_t size_of_block, static void fill_ablock(struct dm_array_info *info, struct array_block *ab, const void *value, unsigned new_nr) { - unsigned i; - uint32_t nr_entries; + uint32_t nr_entries, delta, i; struct dm_btree_value_type *vt = &info->value_type; BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); nr_entries = le32_to_cpu(ab->nr_entries); - for (i = nr_entries; i < new_nr; i++) { - if (vt->inc) - vt->inc(vt->context, value); + delta = new_nr - nr_entries; + if (vt->inc) + vt->inc(vt->context, value, delta); + for (i = nr_entries; i < new_nr; i++) memcpy(element_at(info, ab, i), value, vt->size); - } ab->nr_entries = cpu_to_le32(new_nr); } @@ -199,17 +196,16 @@ static void fill_ablock(struct dm_array_info *info, struct array_block *ab, static void trim_ablock(struct dm_array_info *info, struct array_block *ab, unsigned new_nr) { - unsigned i; - uint32_t nr_entries; + uint32_t nr_entries, delta; struct dm_btree_value_type *vt = &info->value_type; BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); nr_entries = le32_to_cpu(ab->nr_entries); - for (i = nr_entries; i > new_nr; i--) - if (vt->dec) - vt->dec(vt->context, element_at(info, ab, i - 1)); + delta = nr_entries - new_nr; + if (vt->dec) + vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta); ab->nr_entries = cpu_to_le32(new_nr); } @@ -573,16 +569,17 @@ static int grow(struct resize *resize) * These are the value_type functions for the btree elements, which point * to array blocks. */ -static void block_inc(void *context, const void *value) +static void block_inc(void *context, const void *value, unsigned count) { - __le64 block_le; + const __le64 *block_le = value; struct dm_array_info *info = context; + unsigned i; - memcpy(&block_le, value, sizeof(block_le)); - dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); + for (i = 0; i < count; i++, block_le++) + dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le)); } -static void block_dec(void *context, const void *value) +static void __block_dec(void *context, const void *value) { int r; uint64_t b; @@ -621,6 +618,13 @@ static void block_dec(void *context, const void *value) dm_tm_dec(info->btree_info.tm, b); } +static void block_dec(void *context, const void *value, unsigned count) +{ + unsigned i; + for (i = 0; i < count; i++, value += sizeof(__le64)) + __block_dec(context, value); +} + static int block_equal(void *context, const void *value1, const void *value2) { return !memcmp(value1, value2, sizeof(__le64)); @@ -711,7 +715,7 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_ return r; if (vt->inc) - vt->inc(vt->context, element_at(info, ab, i)); + vt->inc(vt->context, element_at(info, ab, i), 1); } ab->nr_entries = cpu_to_le32(new_nr); @@ -822,9 +826,9 @@ static int array_set_value(struct dm_array_info *info, dm_block_t root, old_value = element_at(info, ab, entry); if (vt->dec && (!vt->equal || !vt->equal(vt->context, old_value, value))) { - vt->dec(vt->context, old_value); + vt->dec(vt->context, old_value, 1); if (vt->inc) - vt->inc(vt->context, value); + vt->inc(vt->context, value, 1); } memcpy(old_value, value, info->value_type.size); diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index b1788853a355..893edb426dba 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -144,4 +144,17 @@ extern struct dm_block_validator btree_node_validator; extern void init_le64_type(struct dm_transaction_manager *tm, struct dm_btree_value_type *vt); +/* + * This returns a shadowed btree leaf that you may modify. In practise + * this means overwrites only, since an insert could cause a node to + * be split. Useful if you need access to the old value to calculate the + * new one. + * + * This only works with single level btrees. The given key must be present in + * the tree, otherwise -EINVAL will be returned. + */ +int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, + uint64_t key, int *index, + dm_block_t *new_root, struct dm_block **leaf); + #endif /* DM_BTREE_INTERNAL_H */ diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index eff04fa23dfa..b34af195bf2a 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -544,7 +544,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, if (info->value_type.dec) info->value_type.dec(info->value_type.context, - value_ptr(n, index)); + value_ptr(n, index), 1); delete_at(n, index); } @@ -653,7 +653,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root, if (k >= keys[last_level] && k < end_key) { if (info->value_type.dec) info->value_type.dec(info->value_type.context, - value_ptr(n, index)); + value_ptr(n, index), 1); delete_at(n, index); keys[last_level] = k + 1ull; diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 2061ab865567..f5bd76ed8fe6 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -236,22 +236,14 @@ dm_block_t shadow_root(struct shadow_spine *s) return s->root; } -static void le64_inc(void *context, const void *value_le) +static void le64_inc(void *context, const void *value_le, unsigned count) { - struct dm_transaction_manager *tm = context; - __le64 v_le; - - memcpy(&v_le, value_le, sizeof(v_le)); - dm_tm_inc(tm, le64_to_cpu(v_le)); + dm_tm_with_runs(context, value_le, count, dm_tm_inc_range); } -static void le64_dec(void *context, const void *value_le) +static void le64_dec(void *context, const void *value_le, unsigned count) { - struct dm_transaction_manager *tm = context; - __le64 v_le; - - memcpy(&v_le, value_le, sizeof(v_le)); - dm_tm_dec(tm, le64_to_cpu(v_le)); + dm_tm_with_runs(context, value_le, count, dm_tm_dec_range); } static int le64_equal(void *context, const void *value1_le, const void *value2_le) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 18282932bedc..0703ca7a7d9a 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -71,15 +71,13 @@ static int upper_bound(struct btree_node *n, uint64_t key) void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { - unsigned i; uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) - for (i = 0; i < nr_entries; i++) - dm_tm_inc(tm, value64(n, i)); + dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range); + else if (vt->inc) - for (i = 0; i < nr_entries; i++) - vt->inc(vt->context, value_ptr(n, i)); + vt->inc(vt->context, value_ptr(n, 0), nr_entries); } static int insert_at(size_t value_size, struct btree_node *node, unsigned index, @@ -318,13 +316,9 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) goto out; } else { - if (info->value_type.dec) { - unsigned i; - - for (i = 0; i < f->nr_children; i++) - info->value_type.dec(info->value_type.context, - value_ptr(f->n, i)); - } + if (info->value_type.dec) + info->value_type.dec(info->value_type.context, + value_ptr(f->n, 0), f->nr_children); pop_frame(s); } } @@ -1146,6 +1140,77 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, return 0; } +static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root, + uint64_t key, int *index) +{ + int r, i = -1; + struct btree_node *node; + + *index = 0; + for (;;) { + r = shadow_step(s, root, &s->info->value_type); + if (r < 0) + return r; + + node = dm_block_data(shadow_current(s)); + + /* + * We have to patch up the parent node, ugly, but I don't + * see a way to do this automatically as part of the spine + * op. + */ + if (shadow_has_parent(s) && i >= 0) { + __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); + + __dm_bless_for_disk(&location); + memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), + &location, sizeof(__le64)); + } + + node = dm_block_data(shadow_current(s)); + i = lower_bound(node, key); + + BUG_ON(i < 0); + BUG_ON(i >= le32_to_cpu(node->header.nr_entries)); + + if (le32_to_cpu(node->header.flags) & LEAF_NODE) { + if (key != le64_to_cpu(node->keys[i])) + return -EINVAL; + break; + } + + root = value64(node, i); + } + + *index = i; + return 0; +} + +int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, + uint64_t key, int *index, + dm_block_t *new_root, struct dm_block **leaf) +{ + int r; + struct shadow_spine spine; + + BUG_ON(info->levels > 1); + init_shadow_spine(&spine, info); + r = __btree_get_overwrite_leaf(&spine, root, key, index); + if (!r) { + *new_root = shadow_root(&spine); + *leaf = shadow_current(&spine); + + /* + * Decrement the count so exit_shadow_spine() doesn't + * unlock the leaf. + */ + spine.count--; + } + exit_shadow_spine(&spine); + + return r; +} + static bool need_insert(struct btree_node *node, uint64_t *keys, unsigned level, unsigned index) { @@ -1222,7 +1287,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, value_ptr(n, index), value))) { info->value_type.dec(info->value_type.context, - value_ptr(n, index)); + value_ptr(n, index), 1); } memcpy_disk(value_ptr(n, index), value, info->value_type.size); diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index 3dc5bb1a4748..d2ae5aa4d00b 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -51,21 +51,21 @@ struct dm_btree_value_type { */ /* - * The btree is making a duplicate of the value, for instance + * The btree is making a duplicate of a run of values, for instance * because previously-shared btree nodes have now diverged. * @value argument is the new copy that the copy function may modify. * (Probably it just wants to increment a reference count * somewhere.) This method is _not_ called for insertion of a new * value: It is assumed the ref count is already 1. */ - void (*inc)(void *context, const void *value); + void (*inc)(void *context, const void *value, unsigned count); /* - * This value is being deleted. The btree takes care of freeing + * These values are being deleted. The btree takes care of freeing * the memory pointed to by @value. Often the del function just - * needs to decrement a reference count somewhere. + * needs to decrement a reference counts somewhere. */ - void (*dec)(void *context, const void *value); + void (*dec)(void *context, const void *value, unsigned count); /* * A test for equality between two values. When a value is diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index a213bf11738f..5552941912af 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -6,6 +6,7 @@ #include "dm-space-map-common.h" #include "dm-transaction-manager.h" +#include "dm-btree-internal.h" #include #include @@ -409,12 +410,13 @@ int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, return r; } -static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, - int (*mutator)(void *context, uint32_t old, uint32_t *new), - void *context, enum allocation_event *ev) +/*----------------------------------------------------------------*/ + +int sm_ll_insert(struct ll_disk *ll, dm_block_t b, + uint32_t ref_count, int32_t *nr_allocations) { int r; - uint32_t bit, old, ref_count; + uint32_t bit, old; struct dm_block *nb; dm_block_t index = b; struct disk_index_entry ie_disk; @@ -433,10 +435,9 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, return r; } ie_disk.blocknr = cpu_to_le64(dm_block_location(nb)); - bm_le = dm_bitmap_data(nb); - old = sm_lookup_bitmap(bm_le, bit); + old = sm_lookup_bitmap(bm_le, bit); if (old > 2) { r = sm_ll_lookup_big_ref_count(ll, b, &old); if (r < 0) { @@ -445,7 +446,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, } } - r = mutator(context, old, &ref_count); if (r) { dm_tm_unlock(ll->tm, nb); return r; @@ -453,7 +453,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, if (ref_count <= 2) { sm_set_bitmap(bm_le, bit, ref_count); - dm_tm_unlock(ll->tm, nb); if (old > 2) { @@ -480,62 +479,459 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, } if (ref_count && !old) { - *ev = SM_ALLOC; + *nr_allocations = 1; ll->nr_allocated++; le32_add_cpu(&ie_disk.nr_free, -1); if (le32_to_cpu(ie_disk.none_free_before) == bit) ie_disk.none_free_before = cpu_to_le32(bit + 1); } else if (old && !ref_count) { - *ev = SM_FREE; + *nr_allocations = -1; ll->nr_allocated--; le32_add_cpu(&ie_disk.nr_free, 1); ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); } else - *ev = SM_NONE; + *nr_allocations = 0; return ll->save_ie(ll, index, &ie_disk); } -static int set_ref_count(void *context, uint32_t old, uint32_t *new) +/*----------------------------------------------------------------*/ + +/* + * Holds useful intermediate results for the range based inc and dec + * operations. + */ +struct inc_context { + struct disk_index_entry ie_disk; + struct dm_block *bitmap_block; + void *bitmap; + + struct dm_block *overflow_leaf; +}; + +static inline void init_inc_context(struct inc_context *ic) +{ + ic->bitmap_block = NULL; + ic->bitmap = NULL; + ic->overflow_leaf = NULL; +} + +static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic) +{ + if (ic->bitmap_block) + dm_tm_unlock(ll->tm, ic->bitmap_block); + if (ic->overflow_leaf) + dm_tm_unlock(ll->tm, ic->overflow_leaf); +} + +static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic) +{ + exit_inc_context(ll, ic); + init_inc_context(ic); +} + +/* + * Confirms a btree node contains a particular key at an index. + */ +static bool contains_key(struct btree_node *n, uint64_t key, int index) +{ + return index >= 0 && + index < le32_to_cpu(n->header.nr_entries) && + le64_to_cpu(n->keys[index]) == key; +} + +static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) { - *new = *((uint32_t *) context); + int r; + int index; + struct btree_node *n; + __le32 *v_ptr; + uint32_t rc; + + /* + * bitmap_block needs to be unlocked because getting the + * overflow_leaf may need to allocate, and thus use the space map. + */ + reset_inc_context(ll, ic); + + r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root, + b, &index, &ll->ref_count_root, &ic->overflow_leaf); + if (r < 0) + return r; + + n = dm_block_data(ic->overflow_leaf); + + if (!contains_key(n, b, index)) { + DMERR("overflow btree is missing an entry"); + return -EINVAL; + } + + v_ptr = value_ptr(n, index); + rc = le32_to_cpu(*v_ptr) + 1; + *v_ptr = cpu_to_le32(rc); + return 0; } -int sm_ll_insert(struct ll_disk *ll, dm_block_t b, - uint32_t ref_count, enum allocation_event *ev) +static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) +{ + int index; + struct btree_node *n; + __le32 *v_ptr; + uint32_t rc; + + /* + * Do we already have the correct overflow leaf? + */ + if (ic->overflow_leaf) { + n = dm_block_data(ic->overflow_leaf); + index = lower_bound(n, b); + if (contains_key(n, b, index)) { + v_ptr = value_ptr(n, index); + rc = le32_to_cpu(*v_ptr) + 1; + *v_ptr = cpu_to_le32(rc); + + return 0; + } + } + + return __sm_ll_inc_overflow(ll, b, ic); +} + +static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic) +{ + int r, inc; + r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr), + &dm_sm_bitmap_validator, &ic->bitmap_block, &inc); + if (r < 0) { + DMERR("dm_tm_shadow_block() failed"); + return r; + } + ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block)); + ic->bitmap = dm_bitmap_data(ic->bitmap_block); + return 0; +} + +/* + * Once shadow_bitmap has been called, which always happens at the start of inc/dec, + * we can reopen the bitmap with a simple write lock, rather than re calling + * dm_tm_shadow_block(). + */ +static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic) +{ + if (!ic->bitmap_block) { + int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr), + &dm_sm_bitmap_validator, &ic->bitmap_block); + if (r) { + DMERR("unable to re-get write lock for bitmap"); + return r; + } + ic->bitmap = dm_bitmap_data(ic->bitmap_block); + } + + return 0; +} + +/* + * Loops round incrementing entries in a single bitmap. + */ +static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b, + uint32_t bit, uint32_t bit_end, + int32_t *nr_allocations, dm_block_t *new_b, + struct inc_context *ic) +{ + int r; + __le32 le_rc; + uint32_t old; + + for (; bit != bit_end; bit++, b++) { + /* + * We only need to drop the bitmap if we need to find a new btree + * leaf for the overflow. So if it was dropped last iteration, + * we now re-get it. + */ + r = ensure_bitmap(ll, ic); + if (r) + return r; + + old = sm_lookup_bitmap(ic->bitmap, bit); + switch (old) { + case 0: + /* inc bitmap, adjust nr_allocated */ + sm_set_bitmap(ic->bitmap, bit, 1); + (*nr_allocations)++; + ll->nr_allocated++; + le32_add_cpu(&ic->ie_disk.nr_free, -1); + if (le32_to_cpu(ic->ie_disk.none_free_before) == bit) + ic->ie_disk.none_free_before = cpu_to_le32(bit + 1); + break; + + case 1: + /* inc bitmap */ + sm_set_bitmap(ic->bitmap, bit, 2); + break; + + case 2: + /* inc bitmap and insert into overflow */ + sm_set_bitmap(ic->bitmap, bit, 3); + reset_inc_context(ll, ic); + + le_rc = cpu_to_le32(3); + __dm_bless_for_disk(&le_rc); + r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root, + &b, &le_rc, &ll->ref_count_root); + if (r < 0) { + DMERR("ref count insert failed"); + return r; + } + break; + + default: + /* + * inc within the overflow tree only. + */ + r = sm_ll_inc_overflow(ll, b, ic); + if (r < 0) + return r; + } + } + + *new_b = b; + return 0; +} + +/* + * Finds a bitmap that contains entries in the block range, and increments + * them. + */ +static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, + int32_t *nr_allocations, dm_block_t *new_b) { - return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev); + int r; + struct inc_context ic; + uint32_t bit, bit_end; + dm_block_t index = b; + + init_inc_context(&ic); + + bit = do_div(index, ll->entries_per_block); + r = ll->load_ie(ll, index, &ic.ie_disk); + if (r < 0) + return r; + + r = shadow_bitmap(ll, &ic); + if (r) + return r; + + bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); + r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic); + + exit_inc_context(ll, &ic); + + if (r) + return r; + + return ll->save_ie(ll, index, &ic.ie_disk); } -static int inc_ref_count(void *context, uint32_t old, uint32_t *new) +int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, + int32_t *nr_allocations) { - *new = old + 1; + *nr_allocations = 0; + while (b != e) { + int r = __sm_ll_inc(ll, b, e, nr_allocations, &b); + if (r) + return r; + } + return 0; } -int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +/*----------------------------------------------------------------*/ + +static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b, + struct inc_context *ic) { - return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev); + reset_inc_context(ll, ic); + return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root, + &b, &ll->ref_count_root); } -static int dec_ref_count(void *context, uint32_t old, uint32_t *new) +static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, + struct inc_context *ic, uint32_t *old_rc) { - if (!old) { - DMERR_LIMIT("unable to decrement a reference count below 0"); + int r; + int index = -1; + struct btree_node *n; + __le32 *v_ptr; + uint32_t rc; + + reset_inc_context(ll, ic); + r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root, + b, &index, &ll->ref_count_root, &ic->overflow_leaf); + if (r < 0) + return r; + + n = dm_block_data(ic->overflow_leaf); + + if (!contains_key(n, b, index)) { + DMERR("overflow btree is missing an entry"); return -EINVAL; } - *new = old - 1; + v_ptr = value_ptr(n, index); + rc = le32_to_cpu(*v_ptr); + *old_rc = rc; + + if (rc == 3) { + return __sm_ll_del_overflow(ll, b, ic); + } else { + rc--; + *v_ptr = cpu_to_le32(rc); + return 0; + } +} + +static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, + struct inc_context *ic, uint32_t *old_rc) +{ + /* + * Do we already have the correct overflow leaf? + */ + if (ic->overflow_leaf) { + int index; + struct btree_node *n; + __le32 *v_ptr; + uint32_t rc; + + n = dm_block_data(ic->overflow_leaf); + index = lower_bound(n, b); + if (contains_key(n, b, index)) { + v_ptr = value_ptr(n, index); + rc = le32_to_cpu(*v_ptr); + *old_rc = rc; + + if (rc > 3) { + rc--; + *v_ptr = cpu_to_le32(rc); + return 0; + } else { + return __sm_ll_del_overflow(ll, b, ic); + } + + } + } + + return __sm_ll_dec_overflow(ll, b, ic, old_rc); +} + +/* + * Loops round incrementing entries in a single bitmap. + */ +static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b, + uint32_t bit, uint32_t bit_end, + struct inc_context *ic, + int32_t *nr_allocations, dm_block_t *new_b) +{ + int r; + uint32_t old; + + for (; bit != bit_end; bit++, b++) { + /* + * We only need to drop the bitmap if we need to find a new btree + * leaf for the overflow. So if it was dropped last iteration, + * we now re-get it. + */ + r = ensure_bitmap(ll, ic); + if (r) + return r; + + old = sm_lookup_bitmap(ic->bitmap, bit); + switch (old) { + case 0: + DMERR("unable to decrement block"); + return -EINVAL; + + case 1: + /* dec bitmap */ + sm_set_bitmap(ic->bitmap, bit, 0); + (*nr_allocations)--; + ll->nr_allocated--; + le32_add_cpu(&ic->ie_disk.nr_free, 1); + ic->ie_disk.none_free_before = + cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit)); + break; + + case 2: + /* dec bitmap and insert into overflow */ + sm_set_bitmap(ic->bitmap, bit, 1); + break; + + case 3: + r = sm_ll_dec_overflow(ll, b, ic, &old); + if (r < 0) + return r; + + if (old == 3) { + r = ensure_bitmap(ll, ic); + if (r) + return r; + + sm_set_bitmap(ic->bitmap, bit, 2); + } + break; + } + } + + *new_b = b; return 0; } -int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, + int32_t *nr_allocations, dm_block_t *new_b) +{ + int r; + uint32_t bit, bit_end; + struct inc_context ic; + dm_block_t index = b; + + init_inc_context(&ic); + + bit = do_div(index, ll->entries_per_block); + r = ll->load_ie(ll, index, &ic.ie_disk); + if (r < 0) + return r; + + r = shadow_bitmap(ll, &ic); + if (r) + return r; + + bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); + r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b); + exit_inc_context(ll, &ic); + + if (r) + return r; + + return ll->save_ie(ll, index, &ic.ie_disk); +} + +int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, + int32_t *nr_allocations) { - return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev); + *nr_allocations = 0; + while (b != e) { + int r = __sm_ll_dec(ll, b, e, nr_allocations, &b); + if (r) + return r; + } + + return 0; } +/*----------------------------------------------------------------*/ + int sm_ll_commit(struct ll_disk *ll) { int r = 0; diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 87e17909ef52..4a22183e78b7 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -96,12 +96,6 @@ struct disk_bitmap_header { __le64 blocknr; } __attribute__ ((packed, aligned(8))); -enum allocation_event { - SM_NONE, - SM_ALLOC, - SM_FREE, -}; - /*----------------------------------------------------------------*/ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks); @@ -111,9 +105,15 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, dm_block_t begin, dm_block_t end, dm_block_t *result); -int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); -int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); -int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); + +/* + * The next three functions return (via nr_allocations) the net number of + * allocations that were made. This number may be negative if there were + * more frees than allocs. + */ +int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, int32_t *nr_allocations); +int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations); +int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations); int sm_ll_commit(struct ll_disk *ll); int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm); diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 4f8069bb0481..d0a8d5e73c28 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -87,76 +87,39 @@ static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) { int r; - uint32_t old_count; - enum allocation_event ev; + int32_t nr_allocations; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - r = sm_ll_insert(&smd->ll, b, count, &ev); + r = sm_ll_insert(&smd->ll, b, count, &nr_allocations); if (!r) { - switch (ev) { - case SM_NONE: - break; - - case SM_ALLOC: - /* - * This _must_ be free in the prior transaction - * otherwise we've lost atomicity. - */ - smd->nr_allocated_this_transaction++; - break; - - case SM_FREE: - /* - * It's only free if it's also free in the last - * transaction. - */ - r = sm_ll_lookup(&smd->old_ll, b, &old_count); - if (r) - return r; - - if (!old_count) - smd->nr_allocated_this_transaction--; - break; - } + smd->nr_allocated_this_transaction += nr_allocations; } return r; } -static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) +static int sm_disk_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) { int r; - enum allocation_event ev; + int32_t nr_allocations; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - r = sm_ll_inc(&smd->ll, b, &ev); - if (!r && (ev == SM_ALLOC)) - /* - * This _must_ be free in the prior transaction - * otherwise we've lost atomicity. - */ - smd->nr_allocated_this_transaction++; + r = sm_ll_inc(&smd->ll, b, e, &nr_allocations); + if (!r) + smd->nr_allocated_this_transaction += nr_allocations; return r; } -static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) +static int sm_disk_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) { int r; - uint32_t old_count; - enum allocation_event ev; + int32_t nr_allocations; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - r = sm_ll_dec(&smd->ll, b, &ev); - if (!r && (ev == SM_FREE)) { - /* - * It's only free if it's also free in the last - * transaction. - */ - r = sm_ll_lookup(&smd->old_ll, b, &old_count); - if (!r && !old_count) - smd->nr_allocated_this_transaction--; - } + r = sm_ll_dec(&smd->ll, b, e, &nr_allocations); + if (!r) + smd->nr_allocated_this_transaction += nr_allocations; return r; } @@ -164,7 +127,7 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) { int r; - enum allocation_event ev; + int32_t nr_allocations; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); /* @@ -183,10 +146,9 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) return r; smd->begin = *b + 1; - r = sm_ll_inc(&smd->ll, *b, &ev); + r = sm_ll_inc(&smd->ll, *b, *b + 1, &nr_allocations); if (!r) { - BUG_ON(ev != SM_ALLOC); - smd->nr_allocated_this_transaction++; + smd->nr_allocated_this_transaction += nr_allocations; } return r; @@ -242,8 +204,8 @@ static struct dm_space_map ops = { .get_count = sm_disk_get_count, .count_is_more_than_one = sm_disk_count_is_more_than_one, .set_count = sm_disk_set_count, - .inc_block = sm_disk_inc_block, - .dec_block = sm_disk_dec_block, + .inc_blocks = sm_disk_inc_blocks, + .dec_blocks = sm_disk_dec_blocks, .new_block = sm_disk_new_block, .commit = sm_disk_commit, .root_size = sm_disk_root_size, diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index da439ac85796..392ae26134a4 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -89,7 +89,8 @@ enum block_op_type { struct block_op { enum block_op_type type; - dm_block_t block; + dm_block_t b; + dm_block_t e; }; struct bop_ring_buffer { @@ -116,7 +117,7 @@ static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) } static int brb_push(struct bop_ring_buffer *brb, - enum block_op_type type, dm_block_t b) + enum block_op_type type, dm_block_t b, dm_block_t e) { struct block_op *bop; unsigned next = brb_next(brb, brb->end); @@ -130,7 +131,8 @@ static int brb_push(struct bop_ring_buffer *brb, bop = brb->bops + brb->end; bop->type = type; - bop->block = b; + bop->b = b; + bop->e = e; brb->end = next; @@ -145,9 +147,7 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) return -ENODATA; bop = brb->bops + brb->begin; - result->type = bop->type; - result->block = bop->block; - + memcpy(result, bop, sizeof(*result)); return 0; } @@ -178,10 +178,9 @@ struct sm_metadata { struct threshold threshold; }; -static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) +static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e) { - int r = brb_push(&smm->uncommitted, type, b); - + int r = brb_push(&smm->uncommitted, type, b, e); if (r) { DMERR("too many recursive allocations"); return -ENOMEM; @@ -193,15 +192,15 @@ static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t static int commit_bop(struct sm_metadata *smm, struct block_op *op) { int r = 0; - enum allocation_event ev; + int32_t nr_allocations; switch (op->type) { case BOP_INC: - r = sm_ll_inc(&smm->ll, op->block, &ev); + r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations); break; case BOP_DEC: - r = sm_ll_dec(&smm->ll, op->block, &ev); + r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations); break; } @@ -314,7 +313,7 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, i = brb_next(&smm->uncommitted, i)) { struct block_op *op = smm->uncommitted.bops + i; - if (op->block != b) + if (b < op->b || b >= op->e) continue; switch (op->type) { @@ -355,7 +354,7 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, struct block_op *op = smm->uncommitted.bops + i; - if (op->block != b) + if (b < op->b || b >= op->e) continue; switch (op->type) { @@ -393,7 +392,7 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) { int r, r2; - enum allocation_event ev; + int32_t nr_allocations; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); if (smm->recursion_count) { @@ -402,40 +401,42 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, } in(smm); - r = sm_ll_insert(&smm->ll, b, count, &ev); + r = sm_ll_insert(&smm->ll, b, count, &nr_allocations); r2 = out(smm); return combine_errors(r, r2); } -static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) +static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) { int r, r2 = 0; - enum allocation_event ev; + int32_t nr_allocations; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - if (recursing(smm)) - r = add_bop(smm, BOP_INC, b); - else { + if (recursing(smm)) { + r = add_bop(smm, BOP_INC, b, e); + if (r) + return r; + } else { in(smm); - r = sm_ll_inc(&smm->ll, b, &ev); + r = sm_ll_inc(&smm->ll, b, e, &nr_allocations); r2 = out(smm); } return combine_errors(r, r2); } -static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) +static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) { int r, r2 = 0; - enum allocation_event ev; + int32_t nr_allocations; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); if (recursing(smm)) - r = add_bop(smm, BOP_DEC, b); + r = add_bop(smm, BOP_DEC, b, e); else { in(smm); - r = sm_ll_dec(&smm->ll, b, &ev); + r = sm_ll_dec(&smm->ll, b, e, &nr_allocations); r2 = out(smm); } @@ -445,7 +446,7 @@ static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) { int r, r2 = 0; - enum allocation_event ev; + int32_t nr_allocations; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); /* @@ -466,10 +467,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) smm->begin = *b + 1; if (recursing(smm)) - r = add_bop(smm, BOP_INC, *b); + r = add_bop(smm, BOP_INC, *b, *b + 1); else { in(smm); - r = sm_ll_inc(&smm->ll, *b, &ev); + r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations); r2 = out(smm); } @@ -563,8 +564,8 @@ static const struct dm_space_map ops = { .get_count = sm_metadata_get_count, .count_is_more_than_one = sm_metadata_count_is_more_than_one, .set_count = sm_metadata_set_count, - .inc_block = sm_metadata_inc_block, - .dec_block = sm_metadata_dec_block, + .inc_blocks = sm_metadata_inc_blocks, + .dec_blocks = sm_metadata_dec_blocks, .new_block = sm_metadata_new_block, .commit = sm_metadata_commit, .root_size = sm_metadata_root_size, @@ -648,18 +649,28 @@ static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) return 0; } -static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) +static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) { + int r; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - return add_bop(smm, BOP_INC, b); + r = add_bop(smm, BOP_INC, b, e); + if (r) + return r; + + return 0; } -static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) +static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) { + int r; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - return add_bop(smm, BOP_DEC, b); + r = add_bop(smm, BOP_DEC, b, e); + if (r) + return r; + + return 0; } static int sm_bootstrap_commit(struct dm_space_map *sm) @@ -690,8 +701,8 @@ static const struct dm_space_map bootstrap_ops = { .get_count = sm_bootstrap_get_count, .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, .set_count = sm_bootstrap_set_count, - .inc_block = sm_bootstrap_inc_block, - .dec_block = sm_bootstrap_dec_block, + .inc_blocks = sm_bootstrap_inc_blocks, + .dec_blocks = sm_bootstrap_dec_blocks, .new_block = sm_bootstrap_new_block, .commit = sm_bootstrap_commit, .root_size = sm_bootstrap_root_size, @@ -703,7 +714,7 @@ static const struct dm_space_map bootstrap_ops = { static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) { - int r, i; + int r; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); dm_block_t old_len = smm->ll.nr_blocks; @@ -725,9 +736,7 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) * allocate any new blocks. */ do { - for (i = old_len; !r && i < smm->begin; i++) - r = add_bop(smm, BOP_INC, i); - + r = add_bop(smm, BOP_INC, old_len, smm->begin); if (r) goto out; @@ -774,7 +783,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm, dm_block_t superblock) { int r; - dm_block_t i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); smm->begin = superblock + 1; @@ -799,9 +807,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm, * Now we need to update the newly created data structures with the * allocated blocks that they were built from. */ - for (i = superblock; !r && i < smm->begin; i++) - r = add_bop(smm, BOP_INC, i); - + r = add_bop(smm, BOP_INC, superblock, smm->begin); if (r) return r; diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h index 3e6d1153b7c4..a015cd11f6e9 100644 --- a/drivers/md/persistent-data/dm-space-map.h +++ b/drivers/md/persistent-data/dm-space-map.h @@ -46,8 +46,8 @@ struct dm_space_map { int (*commit)(struct dm_space_map *sm); - int (*inc_block)(struct dm_space_map *sm, dm_block_t b); - int (*dec_block)(struct dm_space_map *sm, dm_block_t b); + int (*inc_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e); + int (*dec_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e); /* * new_block will increment the returned block. @@ -117,14 +117,24 @@ static inline int dm_sm_commit(struct dm_space_map *sm) return sm->commit(sm); } +static inline int dm_sm_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) +{ + return sm->inc_blocks(sm, b, e); +} + static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b) { - return sm->inc_block(sm, b); + return dm_sm_inc_blocks(sm, b, b + 1); +} + +static inline int dm_sm_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) +{ + return sm->dec_blocks(sm, b, e); } static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b) { - return sm->dec_block(sm, b); + return dm_sm_dec_blocks(sm, b, b + 1); } static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b) diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 4353e1146d73..16643fc974e8 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -359,6 +359,17 @@ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b) } EXPORT_SYMBOL_GPL(dm_tm_inc); +void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e) +{ + /* + * The non-blocking clone doesn't support this. + */ + BUG_ON(tm->is_clone); + + dm_sm_inc_blocks(tm->sm, b, e); +} +EXPORT_SYMBOL_GPL(dm_tm_inc_range); + void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b) { /* @@ -370,6 +381,47 @@ void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b) } EXPORT_SYMBOL_GPL(dm_tm_dec); +void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e) +{ + /* + * The non-blocking clone doesn't support this. + */ + BUG_ON(tm->is_clone); + + dm_sm_dec_blocks(tm->sm, b, e); +} +EXPORT_SYMBOL_GPL(dm_tm_dec_range); + +void dm_tm_with_runs(struct dm_transaction_manager *tm, + const __le64 *value_le, unsigned count, dm_tm_run_fn fn) +{ + uint64_t b, begin, end; + bool in_run = false; + unsigned i; + + for (i = 0; i < count; i++, value_le++) { + b = le64_to_cpu(*value_le); + + if (in_run) { + if (b == end) + end++; + else { + fn(tm, begin, end); + begin = b; + end = b + 1; + } + } else { + in_run = true; + begin = b; + end = b + 1; + } + } + + if (in_run) + fn(tm, begin, end); +} +EXPORT_SYMBOL_GPL(dm_tm_with_runs); + int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result) { diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 3d75cc59bbb8..906c02ed0365 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -100,8 +100,18 @@ void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b); * Functions for altering the reference count of a block directly. */ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b); - +void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e); void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b); +void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e); + +/* + * Builds up runs of adjacent blocks, and then calls the given fn + * (typically dm_tm_inc/dec). Very useful when you have to perform + * the same tm operation on all values in a btree leaf. + */ +typedef void (*dm_tm_run_fn)(struct dm_transaction_manager *, dm_block_t, dm_block_t); +void dm_tm_with_runs(struct dm_transaction_manager *tm, + const __le64 *value_le, unsigned count, dm_tm_run_fn fn); int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result); -- cgit