diff options
Diffstat (limited to 'drivers/md/bcache/bset.h')
| -rw-r--r-- | drivers/md/bcache/bset.h | 480 |
1 files changed, 353 insertions, 127 deletions
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 57a9cff41546..6ee2c6a506a2 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -1,6 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H +#include <linux/kernel.h> +#include <linux/types.h> + +#include "bcache_ondisk.h" +#include "util.h" /* for time_stats */ + /* * BKEYS: * @@ -140,14 +147,12 @@ * first key in that range of bytes again. */ -/* Btree key comparison/iteration */ +struct btree_keys; +struct btree_iter; +struct btree_iter_set; +struct bkey_float; -struct btree_iter { - size_t size, used; - struct btree_iter_set { - struct bkey *k, *end; - } data[MAX_BSETS]; -}; +#define MAX_BSETS 4U struct bset_tree { /* @@ -158,14 +163,14 @@ struct bset_tree { */ /* size of the binary tree and prev array */ - unsigned size; + unsigned int size; /* function of size - precalculated for to_inorder() */ - unsigned extra; + unsigned int extra; /* copy of the last key in the set */ - struct bkey end; - struct bkey_float *tree; + struct bkey end; + struct bkey_float *tree; /* * The nodes in the bset tree point to specific keys - this @@ -175,100 +180,260 @@ struct bset_tree { * to keep bkey_float to 4 bytes and prev isn't used in the fast * path. */ - uint8_t *prev; + uint8_t *prev; /* The actual btree node, with pointers to each sorted set */ - struct bset *data; + struct bset *data; }; -static __always_inline int64_t bkey_cmp(const struct bkey *l, - const struct bkey *r) +struct btree_keys_ops { + bool (*sort_cmp)(struct btree_iter_set l, + struct btree_iter_set r); + struct bkey *(*sort_fixup)(struct btree_iter *iter, + struct bkey *tmp); + bool (*insert_fixup)(struct btree_keys *b, + struct bkey *insert, + struct btree_iter *iter, + struct bkey *replace_key); + bool (*key_invalid)(struct btree_keys *bk, + const struct bkey *k); + bool (*key_bad)(struct btree_keys *bk, + const struct bkey *k); + bool (*key_merge)(struct btree_keys *bk, + struct bkey *l, struct bkey *r); + void (*key_to_text)(char *buf, + size_t size, + const struct bkey *k); + void (*key_dump)(struct btree_keys *keys, + const struct bkey *k); + + /* + * Only used for deciding whether to use START_KEY(k) or just the key + * itself in a couple places + */ + bool is_extents; +}; + +struct btree_keys { + const struct btree_keys_ops *ops; + uint8_t page_order; + uint8_t nsets; + unsigned int last_set_unwritten:1; + bool *expensive_debug_checks; + + /* + * Sets of sorted keys - the real btree node - plus a binary search tree + * + * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point + * to the memory we have allocated for this btree node. Additionally, + * set[0]->data points to the entire btree node as it exists on disk. + */ + struct bset_tree set[MAX_BSETS]; +}; + +static inline struct bset_tree *bset_tree_last(struct btree_keys *b) { - return unlikely(KEY_INODE(l) != KEY_INODE(r)) - ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) - : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); + return b->set + b->nsets; } -static inline size_t bkey_u64s(const struct bkey *k) +static inline bool bset_written(struct btree_keys *b, struct bset_tree *t) { - BUG_ON(KEY_CSUM(k) > 1); - return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0); + return t <= b->set + b->nsets - b->last_set_unwritten; } -static inline size_t bkey_bytes(const struct bkey *k) +static inline bool bkey_written(struct btree_keys *b, struct bkey *k) { - return bkey_u64s(k) * sizeof(uint64_t); + return !b->last_set_unwritten || k < b->set[b->nsets].data->start; } -static inline void bkey_copy(struct bkey *dest, const struct bkey *src) +static inline unsigned int bset_byte_offset(struct btree_keys *b, + struct bset *i) { - memcpy(dest, src, bkey_bytes(src)); + return ((size_t) i) - ((size_t) b->set->data); } -static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) +static inline unsigned int bset_sector_offset(struct btree_keys *b, + struct bset *i) { - if (!src) - src = &KEY(0, 0, 0); + return bset_byte_offset(b, i) >> 9; +} + +#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) +#define set_bytes(i) __set_bytes(i, i->keys) + +#define __set_blocks(i, k, block_bytes) \ + DIV_ROUND_UP(__set_bytes(i, k), block_bytes) +#define set_blocks(i, block_bytes) \ + __set_blocks(i, (i)->keys, block_bytes) - SET_KEY_INODE(dest, KEY_INODE(src)); - SET_KEY_OFFSET(dest, KEY_OFFSET(src)); +static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b) +{ + struct bset_tree *t = bset_tree_last(b); + + BUG_ON((PAGE_SIZE << b->page_order) < + (bset_byte_offset(b, t->data) + set_bytes(t->data))); + + if (!b->last_set_unwritten) + return 0; + + return ((PAGE_SIZE << b->page_order) - + (bset_byte_offset(b, t->data) + set_bytes(t->data))) / + sizeof(u64); } -static inline struct bkey *bkey_next(const struct bkey *k) +static inline struct bset *bset_next_set(struct btree_keys *b, + unsigned int block_bytes) { - uint64_t *d = (void *) k; - return (struct bkey *) (d + bkey_u64s(k)); + struct bset *i = bset_tree_last(b)->data; + + return ((void *) i) + roundup(set_bytes(i), block_bytes); } -/* Keylists */ +void bch_btree_keys_free(struct btree_keys *b); +int bch_btree_keys_alloc(struct btree_keys *b, unsigned int page_order, + gfp_t gfp); +void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, + bool *expensive_debug_checks); + +void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic); +void bch_bset_build_written_tree(struct btree_keys *b); +void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k); +bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r); +void bch_bset_insert(struct btree_keys *b, struct bkey *where, + struct bkey *insert); +unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, + struct bkey *replace_key); + +enum { + BTREE_INSERT_STATUS_NO_INSERT = 0, + BTREE_INSERT_STATUS_INSERT, + BTREE_INSERT_STATUS_BACK_MERGE, + BTREE_INSERT_STATUS_OVERWROTE, + BTREE_INSERT_STATUS_FRONT_MERGE, +}; -struct keylist { - struct bkey *top; - union { - uint64_t *list; - struct bkey *bottom; - }; +/* Btree key iteration */ - /* Enough room for btree_split's keys without realloc */ -#define KEYLIST_INLINE 16 - uint64_t d[KEYLIST_INLINE]; +struct btree_iter { + size_t size, used; +#ifdef CONFIG_BCACHE_DEBUG + struct btree_keys *b; +#endif + struct btree_iter_set { + struct bkey *k, *end; + } data[]; }; -static inline void bch_keylist_init(struct keylist *l) +/* Fixed-size btree_iter that can be allocated on the stack */ + +struct btree_iter_stack { + /* Must be last as it ends in a flexible-array member. */ + TRAILING_OVERLAP(struct btree_iter, iter, data, + struct btree_iter_set stack_data[MAX_BSETS]; + ); +}; +static_assert(offsetof(struct btree_iter_stack, iter.data) == + offsetof(struct btree_iter_stack, stack_data)); + +typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); + +struct bkey *bch_btree_iter_next(struct btree_iter *iter); +struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, + struct btree_keys *b, + ptr_filter_fn fn); + +void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + struct bkey *end); +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search); + +struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, + const struct bkey *search); + +/* + * Returns the first key that is strictly greater than search + */ +static inline struct bkey *bch_bset_search(struct btree_keys *b, + struct bset_tree *t, + const struct bkey *search) { - l->top = (void *) (l->list = l->d); + return search ? __bch_bset_search(b, t, search) : t->data->start; } -static inline void bch_keylist_push(struct keylist *l) +#define for_each_key_filter(b, k, stack_iter, filter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ + filter));) + +#define for_each_key(b, k, stack_iter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) + +/* Sorting */ + +struct bset_sort_state { + mempool_t pool; + + unsigned int page_order; + unsigned int crit_factor; + + struct time_stats time; +}; + +void bch_bset_sort_state_free(struct bset_sort_state *state); +int bch_bset_sort_state_init(struct bset_sort_state *state, + unsigned int page_order); +void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state); +void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, + struct bset_sort_state *state); +void bch_btree_sort_and_fix_extents(struct btree_keys *b, + struct btree_iter *iter, + struct bset_sort_state *state); +void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, + struct bset_sort_state *state); + +static inline void bch_btree_sort(struct btree_keys *b, + struct bset_sort_state *state) { - l->top = bkey_next(l->top); + bch_btree_sort_partial(b, 0, state); } -static inline void bch_keylist_add(struct keylist *l, struct bkey *k) +struct bset_stats { + size_t sets_written, sets_unwritten; + size_t bytes_written, bytes_unwritten; + size_t floats, failed; +}; + +void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state); + +/* Bkey utility code */ + +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) + +static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx) { - bkey_copy(l->top, k); - bch_keylist_push(l); + return bkey_idx(i->start, idx); } -static inline bool bch_keylist_empty(struct keylist *l) +static inline void bkey_init(struct bkey *k) { - return l->top == (void *) l->list; + *k = ZERO_KEY; } -static inline void bch_keylist_free(struct keylist *l) +static __always_inline int64_t bkey_cmp(const struct bkey *l, + const struct bkey *r) { - if (l->list != l->d) - kfree(l->list); + return unlikely(KEY_INODE(l) != KEY_INODE(r)) + ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) + : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); } -void bch_keylist_copy(struct keylist *, struct keylist *); -struct bkey *bch_keylist_pop(struct keylist *); -int bch_keylist_realloc(struct keylist *, int, struct cache_set *); - -void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, - unsigned); -bool __bch_cut_front(const struct bkey *, struct bkey *); -bool __bch_cut_back(const struct bkey *, struct bkey *); +void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, + unsigned int i); +bool __bch_cut_front(const struct bkey *where, struct bkey *k); +bool __bch_cut_back(const struct bkey *where, struct bkey *k); static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) { @@ -282,98 +447,159 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) return __bch_cut_back(where, k); } -const char *bch_ptr_status(struct cache_set *, const struct bkey *); -bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *); -bool bch_ptr_bad(struct btree *, const struct bkey *); +/* + * Pointer '*preceding_key_p' points to a memory object to store preceding + * key of k. If the preceding key does not exist, set '*preceding_key_p' to + * NULL. So the caller of preceding_key() needs to take care of memory + * which '*preceding_key_p' pointed to before calling preceding_key(). + * Currently the only caller of preceding_key() is bch_btree_insert_key(), + * and it points to an on-stack variable, so the memory release is handled + * by stackframe itself. + */ +static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p) +{ + if (KEY_INODE(k) || KEY_OFFSET(k)) { + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0); + if (!(*preceding_key_p)->low) + (*preceding_key_p)->high--; + (*preceding_key_p)->low--; + } else { + (*preceding_key_p) = NULL; + } +} -static inline uint8_t gen_after(uint8_t a, uint8_t b) +static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k) { - uint8_t r = a - b; - return r > 128U ? 0 : r; + return b->ops->key_invalid(b, k); } -static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k, - unsigned i) +static inline bool bch_ptr_bad(struct btree_keys *b, const struct bkey *k) { - return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i)); + return b->ops->key_bad(b, k); } -static inline bool ptr_available(struct cache_set *c, const struct bkey *k, - unsigned i) +static inline void bch_bkey_to_text(struct btree_keys *b, char *buf, + size_t size, const struct bkey *k) { - return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i); + return b->ops->key_to_text(buf, size, k); } +static inline bool bch_bkey_equal_header(const struct bkey *l, + const struct bkey *r) +{ + return (KEY_DIRTY(l) == KEY_DIRTY(r) && + KEY_PTRS(l) == KEY_PTRS(r) && + KEY_CSUM(l) == KEY_CSUM(r)); +} -typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); +/* Keylists */ -struct bkey *bch_next_recurse_key(struct btree *, struct bkey *); -struct bkey *bch_btree_iter_next(struct btree_iter *); -struct bkey *bch_btree_iter_next_filter(struct btree_iter *, - struct btree *, ptr_filter_fn); +struct keylist { + union { + struct bkey *keys; + uint64_t *keys_p; + }; + union { + struct bkey *top; + uint64_t *top_p; + }; -void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); -struct bkey *__bch_btree_iter_init(struct btree *, struct btree_iter *, - struct bkey *, struct bset_tree *); + /* Enough room for btree_split's keys without realloc */ +#define KEYLIST_INLINE 16 + uint64_t inline_keys[KEYLIST_INLINE]; +}; -/* 32 bits total: */ -#define BKEY_MID_BITS 3 -#define BKEY_EXPONENT_BITS 7 -#define BKEY_MANTISSA_BITS 22 -#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) +static inline void bch_keylist_init(struct keylist *l) +{ + l->top_p = l->keys_p = l->inline_keys; +} -struct bkey_float { - unsigned exponent:BKEY_EXPONENT_BITS; - unsigned m:BKEY_MID_BITS; - unsigned mantissa:BKEY_MANTISSA_BITS; -} __packed; +static inline void bch_keylist_init_single(struct keylist *l, struct bkey *k) +{ + l->keys = k; + l->top = bkey_next(k); +} -/* - * BSET_CACHELINE was originally intended to match the hardware cacheline size - - * it used to be 64, but I realized the lookup code would touch slightly less - * memory if it was 128. - * - * It definites the number of bytes (in struct bset) per struct bkey_float in - * the auxiliar search tree - when we're done searching the bset_float tree we - * have this many bytes left that we do a linear search over. - * - * Since (after level 5) every level of the bset_tree is on a new cacheline, - * we're touching one fewer cacheline in the bset tree in exchange for one more - * cacheline in the linear search - but the linear search might stop before it - * gets to the second cacheline. - */ +static inline void bch_keylist_push(struct keylist *l) +{ + l->top = bkey_next(l->top); +} -#define BSET_CACHELINE 128 -#define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE) +static inline void bch_keylist_add(struct keylist *l, struct bkey *k) +{ + bkey_copy(l->top, k); + bch_keylist_push(l); +} -#define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) -#define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) +static inline bool bch_keylist_empty(struct keylist *l) +{ + return l->top == l->keys; +} -void bch_bset_init_next(struct btree *); +static inline void bch_keylist_reset(struct keylist *l) +{ + l->top = l->keys; +} -void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); -void bch_bset_fix_lookup_table(struct btree *, struct bkey *); +static inline void bch_keylist_free(struct keylist *l) +{ + if (l->keys_p != l->inline_keys) + kfree(l->keys_p); +} -struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, - const struct bkey *); +static inline size_t bch_keylist_nkeys(struct keylist *l) +{ + return l->top_p - l->keys_p; +} -static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, - const struct bkey *search) +static inline size_t bch_keylist_bytes(struct keylist *l) { - return search ? __bch_bset_search(b, t, search) : t->data->start; + return bch_keylist_nkeys(l) * sizeof(uint64_t); } -bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); -void bch_btree_sort_lazy(struct btree *); -void bch_btree_sort_into(struct btree *, struct btree *); -void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *); -void bch_btree_sort_partial(struct btree *, unsigned); +struct bkey *bch_keylist_pop(struct keylist *l); +void bch_keylist_pop_front(struct keylist *l); +int __bch_keylist_realloc(struct keylist *l, unsigned int u64s); + +/* Debug stuff */ + +#ifdef CONFIG_BCACHE_DEBUG + +int __bch_count_data(struct btree_keys *b); +void __printf(2, 3) __bch_check_keys(struct btree_keys *b, + const char *fmt, + ...); +void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set); +void bch_dump_bucket(struct btree_keys *b); + +#else + +static inline int __bch_count_data(struct btree_keys *b) { return -1; } +static inline void __printf(2, 3) + __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} +static inline void bch_dump_bucket(struct btree_keys *b) {} +void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set); + +#endif + +static inline bool btree_keys_expensive_checks(struct btree_keys *b) +{ +#ifdef CONFIG_BCACHE_DEBUG + return *b->expensive_debug_checks; +#else + return false; +#endif +} -static inline void bch_btree_sort(struct btree *b) +static inline int bch_count_data(struct btree_keys *b) { - bch_btree_sort_partial(b, 0); + return btree_keys_expensive_checks(b) ? __bch_count_data(b) : -1; } -int bch_bset_print_stats(struct cache_set *, char *); +#define bch_check_keys(b, ...) \ +do { \ + if (btree_keys_expensive_checks(b)) \ + __bch_check_keys(b, __VA_ARGS__); \ +} while (0) #endif |
