diff options
Diffstat (limited to 'drivers/md/bcache/bset.c')
| -rw-r--r-- | drivers/md/bcache/bset.c | 346 |
1 files changed, 202 insertions, 144 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 18526d44688d..463eb13bd0b2 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Code for working with individual keys, and sorted sets of keys with in a * btree node @@ -5,7 +6,7 @@ * Copyright 2012 Google, Inc. */ -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ #include "util.h" #include "bset.h" @@ -17,31 +18,31 @@ #ifdef CONFIG_BCACHE_DEBUG -void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) +void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set) { struct bkey *k, *next; for (k = i->start; k < bset_bkey_last(i); k = next) { next = bkey_next(k); - printk(KERN_ERR "block %u key %u/%u: ", set, - (unsigned) ((u64 *) k - i->d), i->keys); + pr_err("block %u key %u/%u: ", set, + (unsigned int) ((u64 *) k - i->d), i->keys); if (b->ops->key_dump) b->ops->key_dump(b, k); else - printk("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); + pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); if (next < bset_bkey_last(i) && bkey_cmp(k, b->ops->is_extents ? &START_KEY(next) : next) > 0) - printk(KERN_ERR "Key skipped backwards\n"); + pr_err("Key skipped backwards\n"); } } void bch_dump_bucket(struct btree_keys *b) { - unsigned i; + unsigned int i; console_lock(); for (i = 0; i <= b->nsets; i++) @@ -52,8 +53,8 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { - unsigned ret = 0; - struct btree_iter iter; + unsigned int ret = 0; + struct btree_iter_stack iter; struct bkey *k; if (b->ops->is_extents) @@ -66,7 +67,7 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) { va_list args; struct bkey *k, *p = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; const char *err; for_each_key(b, k, &iter) { @@ -127,7 +128,7 @@ static inline void bch_btree_iter_next_check(struct btree_iter *iter) {} /* Keylists */ -int __bch_keylist_realloc(struct keylist *l, unsigned u64s) +int __bch_keylist_realloc(struct keylist *l, unsigned int u64s) { size_t oldsize = bch_keylist_nkeys(l); size_t newsize = oldsize + u64s; @@ -154,6 +155,7 @@ int __bch_keylist_realloc(struct keylist *l, unsigned u64s) return 0; } +/* Pop the top key of keylist by pointing l->top to its previous key */ struct bkey *bch_keylist_pop(struct keylist *l) { struct bkey *k = l->keys; @@ -167,6 +169,7 @@ struct bkey *bch_keylist_pop(struct keylist *l) return l->top = k; } +/* Pop the bottom key of keylist and update l->top_p */ void bch_keylist_pop_front(struct keylist *l) { l->top_p -= bkey_u64s(l->keys); @@ -179,7 +182,7 @@ void bch_keylist_pop_front(struct keylist *l) /* Key/pointer manipulation */ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, - unsigned i) + unsigned int i) { BUG_ON(i > KEY_PTRS(src)); @@ -193,7 +196,7 @@ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, bool __bch_cut_front(const struct bkey *where, struct bkey *k) { - unsigned i, len = 0; + unsigned int i, len = 0; if (bkey_cmp(where, &START_KEY(k)) <= 0) return false; @@ -213,7 +216,7 @@ bool __bch_cut_front(const struct bkey *where, struct bkey *k) bool __bch_cut_back(const struct bkey *where, struct bkey *k) { - unsigned len = 0; + unsigned int len = 0; if (bkey_cmp(where, k) >= 0) return false; @@ -239,9 +242,9 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k) #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) struct bkey_float { - unsigned exponent:BKEY_EXPONENT_BITS; - unsigned m:BKEY_MID_BITS; - unsigned mantissa:BKEY_MANTISSA_BITS; + unsigned int exponent:BKEY_EXPONENT_BITS; + unsigned int m:BKEY_MID_BITS; + unsigned int mantissa:BKEY_MANTISSA_BITS; } __packed; /* @@ -308,9 +311,10 @@ void bch_btree_keys_free(struct btree_keys *b) t->tree = NULL; t->data = NULL; } -EXPORT_SYMBOL(bch_btree_keys_free); -int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp) +int bch_btree_keys_alloc(struct btree_keys *b, + unsigned int page_order, + gfp_t gfp) { struct bset_tree *t = b->set; @@ -318,7 +322,7 @@ int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp) b->page_order = page_order; - t->data = (void *) __get_free_pages(gfp, b->page_order); + t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order); if (!t->data) goto err; @@ -339,33 +343,32 @@ err: bch_btree_keys_free(b); return -ENOMEM; } -EXPORT_SYMBOL(bch_btree_keys_alloc); void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, bool *expensive_debug_checks) { - unsigned i; - b->ops = ops; b->expensive_debug_checks = expensive_debug_checks; b->nsets = 0; b->last_set_unwritten = 0; - /* XXX: shouldn't be needed */ - for (i = 0; i < MAX_BSETS; i++) - b->set[i].size = 0; /* - * Second loop starts at 1 because b->keys[0]->data is the memory we - * allocated + * struct btree_keys in embedded in struct btree, and struct + * bset_tree is embedded into struct btree_keys. They are all + * initialized as 0 by kzalloc() in mca_bucket_alloc(), and + * b->set[0].data is allocated in bch_btree_keys_alloc(), so we + * don't have to initiate b->set[].size and b->set[].data here + * any more. */ - for (i = 1; i < MAX_BSETS; i++) - b->set[i].data = NULL; } -EXPORT_SYMBOL(bch_btree_keys_init); /* Binary tree stuff for auxiliary search trees */ -static unsigned inorder_next(unsigned j, unsigned size) +/* + * return array index next to j when does in-order traverse + * of a binary tree which is stored in a linear array + */ +static unsigned int inorder_next(unsigned int j, unsigned int size) { if (j * 2 + 1 < size) { j = j * 2 + 1; @@ -378,7 +381,11 @@ static unsigned inorder_next(unsigned j, unsigned size) return j; } -static unsigned inorder_prev(unsigned j, unsigned size) +/* + * return array index previous to j when does in-order traverse + * of a binary tree which is stored in a linear array + */ +static unsigned int inorder_prev(unsigned int j, unsigned int size) { if (j * 2 < size) { j = j * 2; @@ -391,7 +398,8 @@ static unsigned inorder_prev(unsigned j, unsigned size) return j; } -/* I have no idea why this code works... and I'm the one who wrote it +/* + * I have no idea why this code works... and I'm the one who wrote it * * However, I do know what it does: * Given a binary tree constructed in an array (i.e. how you normally implement @@ -404,10 +412,12 @@ static unsigned inorder_prev(unsigned j, unsigned size) * extra is a function of size: * extra = (size - rounddown_pow_of_two(size - 1)) << 1; */ -static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) +static unsigned int __to_inorder(unsigned int j, + unsigned int size, + unsigned int extra) { - unsigned b = fls(j); - unsigned shift = fls(size - 1) - b; + unsigned int b = fls(j); + unsigned int shift = fls(size - 1) - b; j ^= 1U << (b - 1); j <<= 1; @@ -420,14 +430,20 @@ static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) return j; } -static unsigned to_inorder(unsigned j, struct bset_tree *t) +/* + * Return the cacheline index in bset_tree->data, where j is index + * from a linear array which stores the auxiliar binary tree + */ +static unsigned int to_inorder(unsigned int j, struct bset_tree *t) { return __to_inorder(j, t->size, t->extra); } -static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) +static unsigned int __inorder_to_tree(unsigned int j, + unsigned int size, + unsigned int extra) { - unsigned shift; + unsigned int shift; if (j > extra) j += j - extra; @@ -440,7 +456,11 @@ static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) return j; } -static unsigned inorder_to_tree(unsigned j, struct bset_tree *t) +/* + * Return an index from a linear array which stores the auxiliar binary + * tree, j is the cacheline index of t->data. + */ +static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t) { return __inorder_to_tree(j, t->size, t->extra); } @@ -451,14 +471,15 @@ void inorder_test(void) unsigned long done = 0; ktime_t start = ktime_get(); - for (unsigned size = 2; + for (unsigned int size = 2; size < 65536000; size++) { - unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1; - unsigned i = 1, j = rounddown_pow_of_two(size - 1); + unsigned int extra = + (size - rounddown_pow_of_two(size - 1)) << 1; + unsigned int i = 1, j = rounddown_pow_of_two(size - 1); if (!(size % 4096)) - printk(KERN_NOTICE "loop %u, %llu per us\n", size, + pr_notice("loop %u, %llu per us\n", size, done / ktime_us_delta(ktime_get(), start)); while (1) { @@ -501,30 +522,31 @@ void inorder_test(void) * of the previous key so we can walk backwards to it from t->tree[j]'s key. */ -static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline, - unsigned offset) +static struct bkey *cacheline_to_bkey(struct bset_tree *t, + unsigned int cacheline, + unsigned int offset) { return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8; } -static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k) +static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k) { return ((void *) k - (void *) t->data) / BSET_CACHELINE; } -static unsigned bkey_to_cacheline_offset(struct bset_tree *t, - unsigned cacheline, +static unsigned int bkey_to_cacheline_offset(struct bset_tree *t, + unsigned int cacheline, struct bkey *k) { return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0); } -static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j) +static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j) { return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m); } -static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j) +static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j) { return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]); } @@ -533,7 +555,7 @@ static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j) * For the write set - the one we're currently inserting keys into - we don't * maintain a full search tree, we just keep a simple lookup table in t->prev. */ -static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) +static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline) { return cacheline_to_bkey(t, cacheline, t->prev[cacheline]); } @@ -545,14 +567,29 @@ static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) return low; } -static inline unsigned bfloat_mantissa(const struct bkey *k, +/* + * Calculate mantissa value for struct bkey_float. + * If most significant bit of f->exponent is not set, then + * - f->exponent >> 6 is 0 + * - p[0] points to bkey->low + * - p[-1] borrows bits from KEY_INODE() of bkey->high + * if most isgnificant bits of f->exponent is set, then + * - f->exponent >> 6 is 1 + * - p[0] points to bits from KEY_INODE() of bkey->high + * - p[-1] points to other bits from KEY_INODE() of + * bkey->high too. + * See make_bfloat() to check when most significant bit of f->exponent + * is set or not. + */ +static inline unsigned int bfloat_mantissa(const struct bkey *k, struct bkey_float *f) { const uint64_t *p = &k->low - (f->exponent >> 6); + return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK; } -static void make_bfloat(struct bset_tree *t, unsigned j) +static void make_bfloat(struct bset_tree *t, unsigned int j) { struct bkey_float *f = &t->tree[j]; struct bkey *m = tree_to_bkey(t, j); @@ -569,6 +606,16 @@ static void make_bfloat(struct bset_tree *t, unsigned j) BUG_ON(m < l || m > r); BUG_ON(bkey_next(p) != m); + /* + * If l and r have different KEY_INODE values (different backing + * device), f->exponent records how many least significant bits + * are different in KEY_INODE values and sets most significant + * bits to 1 (by +64). + * If l and r have same KEY_INODE value, f->exponent records + * how many different bits in least significant bits of bkey->low. + * See bfloat_mantiss() how the most significant bit of + * f->exponent is used to calculate bfloat mantissa value. + */ if (KEY_INODE(l) != KEY_INODE(r)) f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64; else @@ -590,7 +637,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j) static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t) { if (t != b->set) { - unsigned j = roundup(t[-1].size, + unsigned int j = roundup(t[-1].size, 64 / sizeof(struct bkey_float)); t->tree = t[-1].tree + j; @@ -630,19 +677,27 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic) bch_bset_build_unwritten_tree(b); } -EXPORT_SYMBOL(bch_bset_init_next); +/* + * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to + * accelerate bkey search in a btree node (pointed by bset_tree->data in + * memory). After search in the auxiliar tree by calling bset_search_tree(), + * a struct bset_search_iter is returned which indicates range [l, r] from + * bset_tree->data where the searching bkey might be inside. Then a followed + * linear comparison does the exact search, see __bch_bset_search() for how + * the auxiliary tree is used. + */ void bch_bset_build_written_tree(struct btree_keys *b) { struct bset_tree *t = bset_tree_last(b); struct bkey *prev = NULL, *k = t->data->start; - unsigned j, cacheline = 1; + unsigned int j, cacheline = 1; b->last_set_unwritten = 0; bset_alloc_tree(b, t); - t->size = min_t(unsigned, + t->size = min_t(unsigned int, bkey_to_cacheline(t, bset_bkey_last(t->data)), b->set->tree + btree_keys_cachelines(b) - t->tree); @@ -657,8 +712,10 @@ void bch_bset_build_written_tree(struct btree_keys *b) for (j = inorder_next(0, t->size); j; j = inorder_next(j, t->size)) { - while (bkey_to_cacheline(t, k) < cacheline) - prev = k, k = bkey_next(k); + while (bkey_to_cacheline(t, k) < cacheline) { + prev = k; + k = bkey_next(k); + } t->prev[j] = bkey_u64s(prev); t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k); @@ -675,14 +732,13 @@ void bch_bset_build_written_tree(struct btree_keys *b) j = inorder_next(j, t->size)) make_bfloat(t, j); } -EXPORT_SYMBOL(bch_bset_build_written_tree); /* Insert */ void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k) { struct bset_tree *t; - unsigned inorder, j = 1; + unsigned int inorder, j = 1; for (t = b->set; t <= bset_tree_last(b); t++) if (k < bset_bkey_last(t->data)) @@ -723,20 +779,20 @@ fix_right: do { j = j * 2 + 1; } while (j < t->size); } -EXPORT_SYMBOL(bch_bset_fix_invalidated_key); static void bch_bset_fix_lookup_table(struct btree_keys *b, struct bset_tree *t, struct bkey *k) { - unsigned shift = bkey_u64s(k); - unsigned j = bkey_to_cacheline(t, k); + unsigned int shift = bkey_u64s(k); + unsigned int j = bkey_to_cacheline(t, k); /* We're getting called from btree_split() or btree_gc, just bail out */ if (!t->size) return; - /* k is the key we just inserted; we need to find the entry in the + /* + * k is the key we just inserted; we need to find the entry in the * lookup table for the first key that is strictly greater than k: * it's either k's cacheline or the next one */ @@ -744,7 +800,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, table_to_bkey(t, j) <= k) j++; - /* Adjust all the lookup table entries, and find a new key for any that + /* + * Adjust all the lookup table entries, and find a new key for any that * have gotten too big */ for (; j < t->size; j++) { @@ -769,7 +826,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, k != bset_bkey_last(t->data); k = bkey_next(k)) if (t->size == bkey_to_cacheline(t, k)) { - t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k); + t->prev[t->size] = + bkey_to_cacheline_offset(t, t->size, k); t->size++; } } @@ -795,7 +853,6 @@ bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r) return b->ops->key_merge(b, l, r); } -EXPORT_SYMBOL(bch_bkey_try_merge); void bch_bset_insert(struct btree_keys *b, struct bkey *where, struct bkey *insert) @@ -815,30 +872,41 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where, bkey_copy(where, insert); bch_bset_fix_lookup_table(b, t, where); } -EXPORT_SYMBOL(bch_bset_insert); -unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k, +unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, struct bkey *replace_key) { - unsigned status = BTREE_INSERT_STATUS_NO_INSERT; + unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; + struct bkey preceding_key_on_stack = ZERO_KEY; + struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - m = bch_btree_iter_init(b, &iter, b->ops->is_extents - ? PRECEDING_KEY(&START_KEY(k)) - : PRECEDING_KEY(k)); + /* + * If k has preceding key, preceding_key_p will be set to address + * of k's preceding key; otherwise preceding_key_p will be set + * to NULL inside preceding_key(). + */ + if (b->ops->is_extents) + preceding_key(&START_KEY(k), &preceding_key_p); + else + preceding_key(k, &preceding_key_p); - if (b->ops->insert_fixup(b, k, &iter, replace_key)) + m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); + + if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) return status; status = BTREE_INSERT_STATUS_INSERT; while (m != bset_bkey_last(i) && - bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0) - prev = m, m = bkey_next(m); + bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0) { + prev = m; + m = bkey_next(m); + } /* prev is in the tree, if we merge we're done */ status = BTREE_INSERT_STATUS_BACK_MERGE; @@ -861,7 +929,6 @@ copy: bkey_copy(m, k); merged: return status; } -EXPORT_SYMBOL(bch_btree_insert_key); /* Lookup */ @@ -872,10 +939,10 @@ struct bset_search_iter { static struct bset_search_iter bset_search_write_set(struct bset_tree *t, const struct bkey *search) { - unsigned li = 0, ri = t->size; + unsigned int li = 0, ri = t->size; while (li + 1 != ri) { - unsigned m = (li + ri) >> 1; + unsigned int m = (li + ri) >> 1; if (bkey_cmp(table_to_bkey(t, m), search) > 0) ri = m; @@ -894,33 +961,28 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t, { struct bkey *l, *r; struct bkey_float *f; - unsigned inorder, j, n = 1; + unsigned int inorder, j, n = 1; do { - unsigned p = n << 4; - p &= ((int) (p - t->size)) >> 31; + unsigned int p = n << 4; - prefetch(&t->tree[p]); + if (p < t->size) + prefetch(&t->tree[p]); j = n; f = &t->tree[j]; - /* - * n = (f->mantissa > bfloat_mantissa()) - * ? j * 2 - * : j * 2 + 1; - * - * We need to subtract 1 from f->mantissa for the sign bit trick - * to work - that's done in make_bfloat() - */ - if (likely(f->exponent != 127)) - n = j * 2 + (((unsigned) - (f->mantissa - - bfloat_mantissa(search, f))) >> 31); - else - n = (bkey_cmp(tree_to_bkey(t, j), search) > 0) - ? j * 2 - : j * 2 + 1; + if (likely(f->exponent != 127)) { + if (f->mantissa >= bfloat_mantissa(search, f)) + n = j * 2; + else + n = j * 2 + 1; + } else { + if (bkey_cmp(tree_to_bkey(t, j), search) > 0) + n = j * 2; + else + n = j * 2 + 1; + } } while (n < t->size); inorder = to_inorder(j, t); @@ -1012,7 +1074,6 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, return i.l; } -EXPORT_SYMBOL(__bch_bset_search); /* Btree iterator */ @@ -1039,39 +1100,39 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, btree_iter_cmp)); } -static struct bkey *__bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search, - struct bset_tree *start) +static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search, + struct bset_tree *start) { struct bkey *ret = NULL; - iter->size = ARRAY_SIZE(iter->data); - iter->used = 0; + + iter->iter.size = ARRAY_SIZE(iter->stack_data); + iter->iter.used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = b; + iter->iter.b = b; #endif for (; start <= bset_tree_last(b); start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); + bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); } return ret; } -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, struct bkey *search) { - return __bch_btree_iter_init(b, iter, search, b->set); + return __bch_btree_iter_stack_init(b, iter, search, b->set); } -EXPORT_SYMBOL(bch_btree_iter_init); static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, btree_iter_cmp_fn *cmp) { - struct btree_iter_set unused; + struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; if (!btree_iter_end(iter)) { @@ -1086,7 +1147,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, } if (iter->data->k == iter->data->end) - heap_pop(iter, unused, cmp); + heap_pop(iter, b, cmp); else heap_sift(iter, 0, cmp); } @@ -1099,7 +1160,6 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter) return __bch_btree_iter_next(iter, btree_iter_cmp); } -EXPORT_SYMBOL(bch_btree_iter_next); struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, struct btree_keys *b, ptr_filter_fn fn) @@ -1117,24 +1177,19 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_bset_sort_state_free(struct bset_sort_state *state) { - if (state->pool) - mempool_destroy(state->pool); + mempool_exit(&state->pool); } -int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) +int bch_bset_sort_state_init(struct bset_sort_state *state, + unsigned int page_order) { spin_lock_init(&state->time.lock); state->page_order = page_order; state->crit_factor = int_sqrt(1 << page_order); - state->pool = mempool_create_page_pool(1, page_order); - if (!state->pool) - return -ENOMEM; - - return 0; + return mempool_init_page_pool(&state->pool, 1, page_order); } -EXPORT_SYMBOL(bch_bset_sort_state_init); static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, @@ -1174,11 +1229,11 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; - pr_debug("sorted %i keys", out->keys); + pr_debug("sorted %i keys\n", out->keys); } static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, - unsigned start, unsigned order, bool fixup, + unsigned int start, unsigned int order, bool fixup, struct bset_sort_state *state) { uint64_t start_time; @@ -1190,7 +1245,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, BUG_ON(order > state->page_order); - outp = mempool_alloc(state->pool, GFP_NOIO); + outp = mempool_alloc(&state->pool, GFP_NOIO); out = page_address(outp); used_mempool = true; order = state->page_order; @@ -1206,6 +1261,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, * Our temporary buffer is the same size as the btree node's * buffer, we can just swap buffers instead of doing a big * memcpy() + * + * Don't worry event 'out' is allocated from mempool, it can + * still be swapped here. Because state->pool is a page mempool + * created by mempool_init_page_pool(), which allocates + * pages by alloc_pages() indeed. */ out->magic = b->set->data->magic; @@ -1219,7 +1279,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, } if (used_mempool) - mempool_free(virt_to_page(out), state->pool); + mempool_free(virt_to_page(out), &state->pool); else free_pages((unsigned long) out, order); @@ -1229,17 +1289,17 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, bch_time_stats_update(&state->time, start_time); } -void bch_btree_sort_partial(struct btree_keys *b, unsigned start, +void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; - struct btree_iter iter; + struct btree_iter_stack iter; int oldsize = bch_count_data(b); - __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); + __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); if (start) { - unsigned i; + unsigned int i; for (i = start; i <= b->nsets; i++) keys += b->set[i].data->keys; @@ -1247,11 +1307,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned start, order = get_order(__set_bytes(b->set->data, keys)); } - __btree_sort(b, &iter, start, order, false, state); + __btree_sort(b, &iter.iter, start, order, false, state); EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); } -EXPORT_SYMBOL(bch_btree_sort_partial); void bch_btree_sort_and_fix_extents(struct btree_keys *b, struct btree_iter *iter, @@ -1264,11 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); + struct btree_iter_stack iter; - struct btree_iter iter; - bch_btree_iter_init(b, &iter, NULL); + bch_btree_iter_stack_init(b, &iter, NULL); - btree_mergesort(b, new->set->data, &iter, false, true); + btree_mergesort(b, new->set->data, &iter.iter, false, true); bch_time_stats_update(&state->time, start_time); @@ -1279,7 +1338,7 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state) { - unsigned crit = SORT_CRIT; + unsigned int crit = SORT_CRIT; int i; /* Don't sort if nothing to do */ @@ -1304,11 +1363,10 @@ void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state) out: bch_bset_build_written_tree(b); } -EXPORT_SYMBOL(bch_btree_sort_lazy); void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats) { - unsigned i; + unsigned int i; for (i = 0; i <= b->nsets; i++) { struct bset_tree *t = &b->set[i]; |
