diff options
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r-- | drivers/md/bcache/Kconfig | 1 | ||||
-rw-r--r-- | drivers/md/bcache/alloc.c | 78 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 7 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 76 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 14 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 36 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 45 | ||||
-rw-r--r-- | drivers/md/bcache/movinggc.c | 35 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 16 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 164 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 10 | ||||
-rw-r--r-- | drivers/md/bcache/util.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/util.h | 67 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 5 |
14 files changed, 290 insertions, 266 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index b2d10063d35f..d4697e79d5a3 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,6 +5,7 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES + select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ce13c272c387..8998e61efa40 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -129,12 +129,9 @@ static inline bool can_inc_bucket_gen(struct bucket *b) bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b) { - BUG_ON(!ca->set->gc_mark_valid); - - return (!GC_MARK(b) || - GC_MARK(b) == GC_MARK_RECLAIMABLE) && - !atomic_read(&b->pin) && - can_inc_bucket_gen(b); + return (ca->set->gc_mark_valid || b->reclaimable_in_gc) && + ((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) && + !atomic_read(&b->pin) && can_inc_bucket_gen(b)); } void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) @@ -148,6 +145,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) bch_inc_gen(ca, b); b->prio = INITIAL_PRIO; atomic_inc(&b->pin); + b->reclaimable_in_gc = 0; } static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) @@ -166,40 +164,61 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) * prio is worth 1/8th of what INITIAL_PRIO is worth. */ -#define bucket_prio(b) \ -({ \ - unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ - \ - (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ -}) +static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) +{ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; -#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) -#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) + return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); +} + +static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) +{ + struct bucket **lhs = (struct bucket **)l; + struct bucket **rhs = (struct bucket **)r; + struct cache *ca = args; + + return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); +} + +static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) +{ + struct bucket **lhs = (struct bucket **)l; + struct bucket **rhs = (struct bucket **)r; + struct cache *ca = args; + + return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); +} static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; - ssize_t i; + const struct min_heap_callbacks bucket_max_cmp_callback = { + .less = new_bucket_max_cmp, + .swp = NULL, + }; + const struct min_heap_callbacks bucket_min_cmp_callback = { + .less = new_bucket_min_cmp, + .swp = NULL, + }; - ca->heap.used = 0; + ca->heap.nr = 0; for_each_bucket(b, ca) { if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!heap_full(&ca->heap)) - heap_add(&ca->heap, b, bucket_max_cmp); - else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { + if (!min_heap_full(&ca->heap)) + min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); + else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_max_cmp); + min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); } } - for (i = ca->heap.used / 2 - 1; i >= 0; --i) - heap_sift(&ca->heap, i, bucket_min_cmp); + min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); while (!fifo_full(&ca->free_inc)) { - if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { + if (!ca->heap.nr) { /* * We don't want to be calling invalidate_buckets() * multiple times when it can't do anything @@ -208,6 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } @@ -352,8 +373,7 @@ static int bch_allocator_thread(void *arg) */ retry_invalidate: - allocator_wait(ca, ca->set->gc_mark_valid && - !ca->invalidate_needs_gc); + allocator_wait(ca, !ca->invalidate_needs_gc); invalidate_buckets(ca); /* @@ -501,8 +521,8 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, ca = c->cache; b = bch_bucket_alloc(ca, reserve, wait); - if (b == -1) - goto err; + if (b < 0) + return -1; k->ptr[0] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), @@ -511,10 +531,6 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, SET_KEY_PTRS(k, 1); return 0; -err: - bch_bucket_free(c, k); - bkey_put(c, k); - return -1; } int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6ae2329052c9..785b0d9008fa 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -200,6 +200,7 @@ struct bucket { uint8_t gen; uint8_t last_gc; /* Most out of date gen in the btree */ uint16_t gc_mark; /* Bitfield used by GC. See below for field */ + uint16_t reclaimable_in_gc:1; }; /* @@ -300,7 +301,7 @@ struct cached_dev { struct list_head list; struct bcache_device disk; struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct cache_sb sb; struct cache_sb_disk *sb_disk; @@ -423,7 +424,7 @@ struct cache { struct kobject kobj; struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct task_struct *alloc_thread; @@ -457,7 +458,7 @@ struct cache { /* Allocation stuff: */ struct bucket *buckets; - DECLARE_HEAP(struct bucket *, heap); + DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 2bba4d6aaaa2..68258a16e125 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -57,6 +57,8 @@ int __bch_count_data(struct btree_keys *b) struct btree_iter iter; struct bkey *k; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + if (b->ops->is_extents) for_each_key(b, k, &iter) ret += KEY_SIZE(k); @@ -70,6 +72,8 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) struct btree_iter iter; const char *err; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key(b, k, &iter) { if (b->ops->is_extents) { err = "Keys out of order"; @@ -110,9 +114,9 @@ bug: static void bch_btree_iter_next_check(struct btree_iter *iter) { - struct bkey *k = iter->data->k, *next = bkey_next(k); + struct bkey *k = iter->heap.data->k, *next = bkey_next(k); - if (next < iter->data->end && + if (next < iter->heap.data->end && bkey_cmp(k, iter->b->ops->is_extents ? &START_KEY(next) : next) > 0) { bch_dump_bucket(iter->b); @@ -885,6 +889,8 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* * If k has preceding key, preceding_key_p will be set to address * of k's preceding key; otherwise preceding_key_p will be set @@ -1077,27 +1083,34 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, /* Btree iterator */ -typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, - struct btree_iter_set); +typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); -static inline bool btree_iter_cmp(struct btree_iter_set l, - struct btree_iter_set r) +static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) { - return bkey_cmp(l.k, r.k) > 0; + const struct btree_iter_set *_l = l; + const struct btree_iter_set *_r = r; + + return bkey_cmp(_l->k, _r->k) <= 0; } static inline bool btree_iter_end(struct btree_iter *iter) { - return !iter->used; + return !iter->heap.nr; } void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end) { + const struct min_heap_callbacks callbacks = { + .less = new_btree_iter_cmp, + .swp = NULL, + }; + if (k != end) - BUG_ON(!heap_add(iter, - ((struct btree_iter_set) { k, end }), - btree_iter_cmp)); + BUG_ON(!min_heap_push(&iter->heap, + &((struct btree_iter_set) { k, end }), + &callbacks, + NULL)); } static struct bkey *__bch_btree_iter_init(struct btree_keys *b, @@ -1107,8 +1120,8 @@ static struct bkey *__bch_btree_iter_init(struct btree_keys *b, { struct bkey *ret = NULL; - iter->size = ARRAY_SIZE(iter->data); - iter->used = 0; + iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); + iter->heap.nr = 0; #ifdef CONFIG_BCACHE_DEBUG iter->b = b; @@ -1130,26 +1143,34 @@ struct bkey *bch_btree_iter_init(struct btree_keys *b, } static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, - btree_iter_cmp_fn *cmp) + new_btree_iter_cmp_fn *cmp) { struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = cmp, + .swp = NULL, + }; if (!btree_iter_end(iter)) { bch_btree_iter_next_check(iter); - ret = iter->data->k; - iter->data->k = bkey_next(iter->data->k); + ret = iter->heap.data->k; + iter->heap.data->k = bkey_next(iter->heap.data->k); - if (iter->data->k > iter->data->end) { + if (iter->heap.data->k > iter->heap.data->end) { WARN_ONCE(1, "bset was corrupt!\n"); - iter->data->k = iter->data->end; + iter->heap.data->k = iter->heap.data->end; } - if (iter->data->k == iter->data->end) - heap_pop(iter, b, cmp); + if (iter->heap.data->k == iter->heap.data->end) { + if (iter->heap.nr) { + b = min_heap_peek(&iter->heap)[0]; + min_heap_pop(&iter->heap, &callbacks, NULL); + } + } else - heap_sift(iter, 0, cmp); + min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); } return ret; @@ -1157,7 +1178,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *bch_btree_iter_next(struct btree_iter *iter) { - return __bch_btree_iter_next(iter, btree_iter_cmp); + return __bch_btree_iter_next(iter, new_btree_iter_cmp); } @@ -1195,16 +1216,18 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { - int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; + const struct min_heap_callbacks callbacks = { + .less = b->ops->sort_cmp, + .swp = NULL, + }; /* Heapify the iterator, using our comparison function */ - for (i = iter->used / 2 - 1; i >= 0; --i) - heap_sift(iter, i, b->ops->sort_cmp); + min_heapify_all(&iter->heap, &callbacks, NULL); while (!btree_iter_end(iter)) { if (b->ops->sort_fixup && fixup) @@ -1296,6 +1319,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct btree_iter iter; int oldsize = bch_count_data(b); + min_heap_init(&iter.heap, NULL, MAX_BSETS); __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); if (start) { @@ -1325,6 +1349,8 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, uint64_t start_time = local_clock(); struct btree_iter iter; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(b, &iter, NULL); btree_mergesort(b, new->set->data, &iter, false, true); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index d795c84246b0..f79441acd4c1 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -187,8 +187,9 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(struct btree_iter_set l, - struct btree_iter_set r); + bool (*sort_cmp)(const void *l, + const void *r, + void *args); struct bkey *(*sort_fixup)(struct btree_iter *iter, struct bkey *tmp); bool (*insert_fixup)(struct btree_keys *b, @@ -312,16 +313,17 @@ enum { BTREE_INSERT_STATUS_FRONT_MERGE, }; +struct btree_iter_set { + struct bkey *k, *end; +}; + /* Btree key iteration */ struct btree_iter { - size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree_keys *b; #endif - struct btree_iter_set { - struct bkey *k, *end; - } data[MAX_BSETS]; + MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; }; typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 196cdacce38f..ed40d8600656 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -149,19 +149,19 @@ void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = btree_bset_first(b); - struct btree_iter *iter; + struct btree_iter iter; /* * c->fill_iter can allocate an iterator with more memory space * than static MAX_BSETS. * See the comment arount cache_set->fill_iter. */ - iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; - iter->used = 0; + iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); + iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter.heap.nr = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = &b->keys; + iter.b = &b->keys; #endif if (!i->seq) @@ -199,7 +199,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->keys.set[0].data && !i->keys) goto err; - bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); + bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, block_bytes(b->c->cache)); } @@ -211,7 +211,7 @@ void bch_btree_node_read_done(struct btree *b) if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); i = b->keys.set[0].data; err = "short btree key"; @@ -223,7 +223,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->cache->sb)); out: - mempool_free(iter, &b->c->fill_iter); + mempool_free(iter.heap.data, &b->c->fill_iter); return; err: set_btree_node_io_error(b); @@ -1312,6 +1312,8 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) struct btree_iter iter; struct bset_tree *t; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + gc->nodes++; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { @@ -1573,6 +1575,8 @@ static unsigned int btree_gc_count_keys(struct btree *b) struct btree_iter iter; unsigned int ret = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1615,6 +1619,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, struct gc_merge_info r[GC_MERGE_NODES]; struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; + min_heap_init(&iter.heap, NULL, MAX_BSETS); bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); for (i = r; i < r + ARRAY_SIZE(r); i++) @@ -1740,18 +1745,20 @@ static void btree_gc_start(struct cache_set *c) mutex_lock(&c->bucket_lock); - c->gc_mark_valid = 0; c->gc_done = ZERO_KEY; ca = c->cache; for_each_bucket(b, ca) { b->last_gc = b->gen; + if (bch_can_invalidate_bucket(ca, b)) + b->reclaimable_in_gc = 1; if (!atomic_read(&b->pin)) { SET_GC_MARK(b, 0); SET_GC_SECTORS_USED(b, 0); } } + c->gc_mark_valid = 0; mutex_unlock(&c->bucket_lock); } @@ -1808,6 +1815,9 @@ static void bch_btree_gc_finish(struct cache_set *c) for_each_bucket(b, ca) { c->need_gc = max(c->need_gc, bucket_gc_gen(b)); + if (b->reclaimable_in_gc) + b->reclaimable_in_gc = 0; + if (atomic_read(&b->pin)) continue; @@ -1913,6 +1923,8 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) struct bkey *k, *p = NULL; struct btree_iter iter; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(b->c, b->level, k); @@ -1958,6 +1970,8 @@ static int bch_btree_check_thread(void *arg) cur_idx = prev_idx = 0; ret = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* root node keys are checked before thread created */ bch_btree_iter_init(&c->root->keys, &iter, NULL); k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); @@ -2054,6 +2068,8 @@ int bch_btree_check(struct cache_set *c) struct btree_iter iter; struct btree_check_state check_state; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(c, c->root->level, k); @@ -2549,6 +2565,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, struct bkey *k; struct btree_iter iter; + min_heap_init(&iter.heap, NULL, MAX_BSETS); bch_btree_iter_init(&b->keys, &iter, from); while ((k = bch_btree_iter_next_filter(&iter, &b->keys, @@ -2582,6 +2599,7 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, struct bkey *k; struct btree_iter iter; + min_heap_init(&iter.heap, NULL, MAX_BSETS); bch_btree_iter_init(&b->keys, &iter, from); while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index d626ffcbecb9..4b84fda1530a 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -33,15 +33,16 @@ static void sort_key_next(struct btree_iter *iter, i->k = bkey_next(i->k); if (i->k == i->end) - *i = iter->data[--iter->used]; + *i = iter->heap.data[--iter->heap.nr]; } -static bool bch_key_sort_cmp(struct btree_iter_set l, - struct btree_iter_set r) +static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) { - int64_t c = bkey_cmp(l.k, r.k); + struct btree_iter_set *_l = (struct btree_iter_set *)l; + struct btree_iter_set *_r = (struct btree_iter_set *)r; + int64_t c = bkey_cmp(_l->k, _r->k); - return c ? c > 0 : l.k < r.k; + return !(c ? c > 0 : _l->k < _r->k); } static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) @@ -238,7 +239,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, } const struct btree_keys_ops bch_btree_keys_ops = { - .sort_cmp = bch_key_sort_cmp, + .sort_cmp = new_bch_key_sort_cmp, .insert_fixup = bch_btree_ptr_insert_fixup, .key_invalid = bch_btree_ptr_invalid, .key_bad = bch_btree_ptr_bad, @@ -255,22 +256,28 @@ const struct btree_keys_ops bch_btree_keys_ops = { * Necessary for btree_sort_fixup() - if there are multiple keys that compare * equal in different sets, we have to process them newest to oldest. */ -static bool bch_extent_sort_cmp(struct btree_iter_set l, - struct btree_iter_set r) + +static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) { - int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); + struct btree_iter_set *_l = (struct btree_iter_set *)l; + struct btree_iter_set *_r = (struct btree_iter_set *)r; + int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); - return c ? c > 0 : l.k < r.k; + return !(c ? c > 0 : _l->k < _r->k); } static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { - while (iter->used > 1) { - struct btree_iter_set *top = iter->data, *i = top + 1; - - if (iter->used > 2 && - bch_extent_sort_cmp(i[0], i[1])) + const struct min_heap_callbacks callbacks = { + .less = new_bch_extent_sort_cmp, + .swp = NULL, + }; + while (iter->heap.nr > 1) { + struct btree_iter_set *top = iter->heap.data, *i = top + 1; + + if (iter->heap.nr > 2 && + !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) i++; if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) @@ -278,7 +285,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, if (!KEY_SIZE(i->k)) { sort_key_next(iter, i); - heap_sift(iter, i - top, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); continue; } @@ -288,7 +295,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, else bch_cut_front(top->k, i->k); - heap_sift(iter, i - top, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); } else { /* can't happen because of comparison func */ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); @@ -298,7 +305,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, bch_cut_back(&START_KEY(i->k), tmp); bch_cut_front(i->k, top->k); - heap_sift(iter, 0, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); return tmp; } else { @@ -618,7 +625,7 @@ static bool bch_extent_merge(struct btree_keys *bk, } const struct btree_keys_ops bch_extent_keys_ops = { - .sort_cmp = bch_extent_sort_cmp, + .sort_cmp = new_bch_extent_sort_cmp, .sort_fixup = bch_extent_sort_fixup, .insert_fixup = bch_extent_insert_fixup, .key_invalid = bch_extent_invalid, diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index ebd500bdf0b2..45ca134cbf02 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -82,7 +82,7 @@ static void moving_init(struct moving_io *io) bio_init(bio, NULL, bio->bi_inline_vecs, DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0); bio_get(bio); - bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9; bio->bi_private = &io->cl; @@ -182,16 +182,19 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_sync(&cl); } -static bool bucket_cmp(struct bucket *l, struct bucket *r) +static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) { - return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); + struct bucket **_l = (struct bucket **)l; + struct bucket **_r = (struct bucket **)r; + + return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); } static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; - return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; + return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -199,6 +202,10 @@ void bch_moving_gc(struct cache_set *c) struct cache *ca = c->cache; struct bucket *b; unsigned long sectors_to_move, reserve_sectors; + const struct min_heap_callbacks callbacks = { + .less = new_bucket_cmp, + .swp = NULL, + }; if (!c->copy_gc_enabled) return; @@ -209,7 +216,7 @@ void bch_moving_gc(struct cache_set *c) reserve_sectors = ca->sb.bucket_size * fifo_used(&ca->free[RESERVE_MOVINGGC]); - ca->heap.used = 0; + ca->heap.nr = 0; for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || @@ -218,25 +225,31 @@ void bch_moving_gc(struct cache_set *c) atomic_read(&b->pin)) continue; - if (!heap_full(&ca->heap)) { + if (!min_heap_full(&ca->heap)) { sectors_to_move += GC_SECTORS_USED(b); - heap_add(&ca->heap, b, bucket_cmp); - } else if (bucket_cmp(b, heap_peek(&ca->heap))) { + min_heap_push(&ca->heap, &b, &callbacks, NULL); + } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_cmp); + min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); } } while (sectors_to_move > reserve_sectors) { - heap_pop(&ca->heap, b, bucket_cmp); + if (ca->heap.nr) { + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &callbacks, NULL); + } sectors_to_move -= GC_SECTORS_USED(b); } - while (heap_pop(&ca->heap, b, bucket_cmp)) + while (ca->heap.nr) { + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &callbacks, NULL); SET_GC_MOVE(b, 1); + } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 83d112bd2b1c..af345dc6fde1 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -369,10 +369,24 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) struct io *i; if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - c->gc_stats.in_use > CUTOFF_CACHE_ADD || (bio_op(bio) == REQ_OP_DISCARD)) goto skip; + if (c->gc_stats.in_use > CUTOFF_CACHE_ADD) { + /* + * If cached buckets are all clean now, 'true' will be + * returned and all requests will bypass the cache device. + * Then c->sectors_to_gc has no chance to be negative, and + * gc thread won't wake up and caching won't work forever. + * Here call force_wake_up_gc() to avoid such aftermath. + */ + if (BDEV_STATE(&dc->sb) == BDEV_STATE_CLEAN && + c->gc_mark_valid) + force_wake_up_gc(c); + + goto skip; + } + if (mode == CACHE_MODE_NONE || (mode == CACHE_MODE_WRITEAROUND && op_is_write(bio_op(bio)))) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dc3f50f69714..e42f1400cea9 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -171,7 +171,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, struct page *page; unsigned int i; - page = read_cache_page_gfp(bdev->bd_inode->i_mapping, + page = read_cache_page_gfp(bdev->bd_mapping, SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); if (IS_ERR(page)) return "IO error"; @@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d) bcache_device_detach(d); if (disk) { - ida_simple_remove(&bcache_device_idx, - first_minor_to_idx(disk->first_minor)); + ida_free(&bcache_device_idx, + first_minor_to_idx(disk->first_minor)); put_disk(disk); } @@ -897,12 +897,26 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, sector_t sectors, struct block_device *cached_bdev, const struct block_device_operations *ops) { - struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, SIZE_MAX / sizeof(atomic_t)); + struct queue_limits lim = { + .max_hw_sectors = UINT_MAX, + .max_sectors = UINT_MAX, + .max_segment_size = UINT_MAX, + .max_segments = BIO_MAX_VECS, + .max_hw_discard_sectors = UINT_MAX, + .io_min = block_size, + .logical_block_size = block_size, + .physical_block_size = block_size, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, + }; uint64_t n; int idx; + if (cached_bdev) { + d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT; + lim.io_opt = umax(block_size, bdev_io_opt(cached_bdev)); + } if (!d->stripe_size) d->stripe_size = 1 << 31; else if (d->stripe_size < BCH_MIN_STRIPE_SZ) @@ -926,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, if (!d->full_dirty_stripes) goto out_free_stripe_sectors_dirty; - idx = ida_simple_get(&bcache_device_idx, 0, - BCACHE_DEVICE_IDX_MAX, GFP_KERNEL); + idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1, + GFP_KERNEL); if (idx < 0) goto out_free_full_dirty_stripes; @@ -935,52 +949,37 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) goto out_ida_remove; - d->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!d->disk) - goto out_bioset_exit; - - set_capacity(d->disk, sectors); - snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); - - d->disk->major = bcache_major; - d->disk->first_minor = idx_to_first_minor(idx); - d->disk->minors = BCACHE_MINORS; - d->disk->fops = ops; - d->disk->private_data = d; - - q = d->disk->queue; - q->limits.max_hw_sectors = UINT_MAX; - q->limits.max_sectors = UINT_MAX; - q->limits.max_segment_size = UINT_MAX; - q->limits.max_segments = BIO_MAX_VECS; - blk_queue_max_discard_sectors(q, UINT_MAX); - q->limits.io_min = block_size; - q->limits.logical_block_size = block_size; - q->limits.physical_block_size = block_size; - - if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { + if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { /* * This should only happen with BCACHE_SB_VERSION_BDEV. * Block/page size is checked for BCACHE_SB_VERSION_CDEV. */ - pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", - d->disk->disk_name, q->limits.logical_block_size, + pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", + idx, lim.logical_block_size, PAGE_SIZE, bdev_logical_block_size(cached_bdev)); /* This also adjusts physical block size/min io size if needed */ - blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev)); + lim.logical_block_size = bdev_logical_block_size(cached_bdev); } - blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); + d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(d->disk)) + goto out_bioset_exit; - blk_queue_write_cache(q, true, true); + set_capacity(d->disk, sectors); + snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); + d->disk->major = bcache_major; + d->disk->first_minor = idx_to_first_minor(idx); + d->disk->minors = BCACHE_MINORS; + d->disk->fops = ops; + d->disk->private_data = d; return 0; out_bioset_exit: bioset_exit(&d->bio_split); out_ida_remove: - ida_simple_remove(&bcache_device_idx, idx); + ida_free(&bcache_device_idx, idx); out_free_full_dirty_stripes: kvfree(d->full_dirty_stripes); out_free_stripe_sectors_dirty: @@ -1369,8 +1368,8 @@ static CLOSURE_CALLBACK(cached_dev_free) if (dc->sb_disk) put_page(virt_to_page(dc->sb_disk)); - if (dc->bdev_handle) - bdev_release(dc->bdev_handle); + if (dc->bdev_file) + fput(dc->bdev_file); wake_up(&unregister_wait); @@ -1416,11 +1415,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); } - dc->disk.stripe_size = q->limits.io_opt >> 9; - - if (dc->disk.stripe_size) - dc->partial_stripes_expensive = - q->limits.raid_partial_stripes_expensive; + if (bdev_io_opt(dc->bdev)) + dc->partial_stripes_expensive = !!(q->limits.features & + BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE); ret = bcache_device_init(&dc->disk, block_size, bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, @@ -1428,9 +1425,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) if (ret) return ret; - blk_queue_io_opt(dc->disk.disk->queue, - max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); - atomic_set(&dc->io_errors, 0); dc->io_disable = false; dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; @@ -1445,7 +1439,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) /* Cached device - bcache superblock */ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, - struct bdev_handle *bdev_handle, + struct file *bdev_file, struct cached_dev *dc) { const char *err = "cannot allocate memory"; @@ -1453,8 +1447,8 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, int ret = -ENOMEM; memcpy(&dc->sb, sb, sizeof(struct cache_sb)); - dc->bdev_handle = bdev_handle; - dc->bdev = bdev_handle->bdev; + dc->bdev_file = bdev_file; + dc->bdev = file_bdev(bdev_file); dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) @@ -1724,7 +1718,7 @@ static CLOSURE_CALLBACK(cache_set_flush) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR(c->root)) + if (!IS_ERR_OR_NULL(c->root)) list_add(&c->root->list, &c->btree_cache); /* @@ -1913,8 +1907,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) * - sizeof(struct btree_iter_set); + iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); if (!c->devices) @@ -2218,8 +2212,8 @@ void bch_cache_release(struct kobject *kobj) if (ca->sb_disk) put_page(virt_to_page(ca->sb_disk)); - if (ca->bdev_handle) - bdev_release(ca->bdev_handle); + if (ca->bdev_file) + fput(ca->bdev_file); kfree(ca); module_put(THIS_MODULE); @@ -2339,18 +2333,18 @@ err_free: } static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, - struct bdev_handle *bdev_handle, + struct file *bdev_file, struct cache *ca) { const char *err = NULL; /* must be set for any error case */ int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); - ca->bdev_handle = bdev_handle; - ca->bdev = bdev_handle->bdev; + ca->bdev_file = bdev_file; + ca->bdev = file_bdev(bdev_file); ca->sb_disk = sb_disk; - if (bdev_max_discard_sectors((bdev_handle->bdev))) + if (bdev_max_discard_sectors(file_bdev(bdev_file))) ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); @@ -2361,20 +2355,20 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, err = "cache_alloc(): cache device is too small"; else err = "cache_alloc(): unknown error"; - pr_notice("error %pg: %s\n", bdev_handle->bdev, err); + pr_notice("error %pg: %s\n", file_bdev(bdev_file), err); /* * If we failed here, it means ca->kobj is not initialized yet, * kobject_put() won't be called and there is no chance to - * call bdev_release() to bdev in bch_cache_release(). So - * we explicitly call bdev_release() here. + * call fput() to bdev in bch_cache_release(). So + * we explicitly call fput() on the block device here. */ - bdev_release(bdev_handle); + fput(bdev_file); return ret; } - if (kobject_add(&ca->kobj, bdev_kobj(bdev_handle->bdev), "bcache")) { + if (kobject_add(&ca->kobj, bdev_kobj(file_bdev(bdev_file)), "bcache")) { pr_notice("error %pg: error calling kobject_add\n", - bdev_handle->bdev); + file_bdev(bdev_file)); ret = -ENOMEM; goto out; } @@ -2388,7 +2382,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %pg\n", ca->bdev_handle->bdev); + pr_info("registered cache device %pg\n", file_bdev(ca->bdev_file)); out: kobject_put(&ca->kobj); @@ -2446,7 +2440,7 @@ struct async_reg_args { char *path; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct bdev_handle *bdev_handle; + struct file *bdev_file; void *holder; }; @@ -2457,7 +2451,7 @@ static void register_bdev_worker(struct work_struct *work) container_of(work, struct async_reg_args, reg_work.work); mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev_handle, + if (register_bdev(args->sb, args->sb_disk, args->bdev_file, args->holder) < 0) fail = true; mutex_unlock(&bch_register_lock); @@ -2478,7 +2472,7 @@ static void register_cache_worker(struct work_struct *work) container_of(work, struct async_reg_args, reg_work.work); /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev_handle, + if (register_cache(args->sb, args->sb_disk, args->bdev_file, args->holder)) fail = true; @@ -2516,7 +2510,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct bdev_handle *bdev_handle, *bdev_handle2; + struct file *bdev_file, *bdev_file2; void *holder = NULL; ssize_t ret; bool async_registration = false; @@ -2549,15 +2543,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev_handle = bdev_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); - if (IS_ERR(bdev_handle)) + bdev_file = bdev_file_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev_file)) goto out_free_sb; - err = "failed to set blocksize"; - if (set_blocksize(bdev_handle->bdev, 4096)) - goto out_blkdev_put; - - err = read_super(sb, bdev_handle->bdev, &sb_disk); + err = read_super(sb, file_bdev(bdev_file), &sb_disk); if (err) goto out_blkdev_put; @@ -2569,13 +2559,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, } /* Now reopen in exclusive mode with proper holder */ - bdev_handle2 = bdev_open_by_dev(bdev_handle->bdev->bd_dev, + bdev_file2 = bdev_file_open_by_dev(file_bdev(bdev_file)->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, holder, NULL); - bdev_release(bdev_handle); - bdev_handle = bdev_handle2; - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); - bdev_handle = NULL; + fput(bdev_file); + bdev_file = bdev_file2; + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); + bdev_file = NULL; if (ret == -EBUSY) { dev_t dev; @@ -2610,7 +2600,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, args->path = path; args->sb = sb; args->sb_disk = sb_disk; - args->bdev_handle = bdev_handle; + args->bdev_file = bdev_file; args->holder = holder; register_device_async(args); /* No wait and returns to user space */ @@ -2619,14 +2609,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (SB_IS_BDEV(sb)) { mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev_handle, holder); + ret = register_bdev(sb, sb_disk, bdev_file, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev_handle, holder); + ret = register_cache(sb, sb_disk, bdev_file, holder); if (ret) goto out_free_sb; } @@ -2642,8 +2632,8 @@ out_free_holder: out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - if (bdev_handle) - bdev_release(bdev_handle); + if (bdev_file) + fput(bdev_file); out_free_sb: kfree(sb); out_free_path: diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index a438efb66069..e8f696cb58c0 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -662,6 +662,8 @@ static unsigned int bch_root_usage(struct cache_set *c) struct btree *b; struct btree_iter iter; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + goto lock_root; do { @@ -702,13 +704,7 @@ static unsigned int bch_cache_max_chain(struct cache_set *c) for (h = c->bucket_hash; h < c->bucket_hash + (1 << BUCKET_HASH_BITS); h++) { - unsigned int i = 0; - struct hlist_node *p; - - hlist_for_each(p, h) - i++; - - ret = max(ret, i); + ret = max(ret, hlist_count_nodes(h)); } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index ae380bc3992e..410d8cb49e50 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * random utiility code, for bcache but in theory not specific to bcache + * random utility code, for bcache but in theory not specific to bcache * * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> * Copyright 2012 Google, Inc. diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index f61ab1bada6c..539454d8e2d0 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/sched/clock.h> #include <linux/llist.h> +#include <linux/min_heap.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -30,16 +31,10 @@ struct closure; #endif -#define DECLARE_HEAP(type, name) \ - struct { \ - size_t size, used; \ - type *data; \ - } name - #define init_heap(heap, _size, gfp) \ ({ \ size_t _bytes; \ - (heap)->used = 0; \ + (heap)->nr = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ @@ -52,64 +47,6 @@ do { \ (heap)->data = NULL; \ } while (0) -#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) - -#define heap_sift(h, i, cmp) \ -do { \ - size_t _r, _j = i; \ - \ - for (; _j * 2 + 1 < (h)->used; _j = _r) { \ - _r = _j * 2 + 1; \ - if (_r + 1 < (h)->used && \ - cmp((h)->data[_r], (h)->data[_r + 1])) \ - _r++; \ - \ - if (cmp((h)->data[_r], (h)->data[_j])) \ - break; \ - heap_swap(h, _r, _j); \ - } \ -} while (0) - -#define heap_sift_down(h, i, cmp) \ -do { \ - while (i) { \ - size_t p = (i - 1) / 2; \ - if (cmp((h)->data[i], (h)->data[p])) \ - break; \ - heap_swap(h, i, p); \ - i = p; \ - } \ -} while (0) - -#define heap_add(h, d, cmp) \ -({ \ - bool _r = !heap_full(h); \ - if (_r) { \ - size_t _i = (h)->used++; \ - (h)->data[_i] = d; \ - \ - heap_sift_down(h, _i, cmp); \ - heap_sift(h, _i, cmp); \ - } \ - _r; \ -}) - -#define heap_pop(h, d, cmp) \ -({ \ - bool _r = (h)->used; \ - if (_r) { \ - (d) = (h)->data[0]; \ - (h)->used--; \ - heap_swap(h, 0, (h)->used); \ - heap_sift(h, 0, cmp); \ - } \ - _r; \ -}) - -#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) - -#define heap_full(h) ((h)->used == (h)->size) - #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 8827a6f130ad..453efbbdc8ee 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -334,7 +334,7 @@ static void dirty_init(struct keybuf_key *w) bio_init(bio, NULL, bio->bi_inline_vecs, DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0); if (!io->dc->writeback_percent) - bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9; bio->bi_private = w; @@ -915,6 +915,7 @@ static int bch_dirty_init_thread(void *arg) k = p = NULL; prev_idx = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); bch_btree_iter_init(&c->root->keys, &iter, NULL); k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); @@ -984,6 +985,8 @@ void bch_sectors_dirty_init(struct bcache_device *d) struct cache_set *c = d->c; struct bch_dirty_init_state state; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + retry_lock: b = c->root; rw_lock(0, b, b->level); |