summaryrefslogtreecommitdiff
path: root/drivers/md/bcache
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/alloc.c21
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/bset.c44
-rw-r--r--drivers/md/bcache/bset.h28
-rw-r--r--drivers/md/bcache/btree.c52
-rw-r--r--drivers/md/bcache/movinggc.c2
-rw-r--r--drivers/md/bcache/request.c16
-rw-r--r--drivers/md/bcache/stats.c4
-rw-r--r--drivers/md/bcache/super.c97
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/util.c2
-rw-r--r--drivers/md/bcache/writeback.c12
12 files changed, 164 insertions, 117 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ce13c272c387..48ce750bf70a 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -129,12 +129,9 @@ static inline bool can_inc_bucket_gen(struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
- BUG_ON(!ca->set->gc_mark_valid);
-
- return (!GC_MARK(b) ||
- GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
- !atomic_read(&b->pin) &&
- can_inc_bucket_gen(b);
+ return (ca->set->gc_mark_valid || b->reclaimable_in_gc) &&
+ ((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
+ !atomic_read(&b->pin) && can_inc_bucket_gen(b));
}
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -148,6 +145,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO;
atomic_inc(&b->pin);
+ b->reclaimable_in_gc = 0;
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -352,8 +350,7 @@ static int bch_allocator_thread(void *arg)
*/
retry_invalidate:
- allocator_wait(ca, ca->set->gc_mark_valid &&
- !ca->invalidate_needs_gc);
+ allocator_wait(ca, !ca->invalidate_needs_gc);
invalidate_buckets(ca);
/*
@@ -501,8 +498,8 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
ca = c->cache;
b = bch_bucket_alloc(ca, reserve, wait);
- if (b == -1)
- goto err;
+ if (b < 0)
+ return -1;
k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
bucket_to_sector(c, b),
@@ -511,10 +508,6 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
SET_KEY_PTRS(k, 1);
return 0;
-err:
- bch_bucket_free(c, k);
- bkey_put(c, k);
- return -1;
}
int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 4e6afa89921f..1d33e40d26ea 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -200,6 +200,7 @@ struct bucket {
uint8_t gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
+ uint16_t reclaimable_in_gc:1;
};
/*
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 2bba4d6aaaa2..463eb13bd0b2 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -54,7 +54,7 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
unsigned int ret = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k;
if (b->ops->is_extents)
@@ -67,7 +67,7 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
const char *err;
for_each_key(b, k, &iter) {
@@ -879,7 +879,7 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey preceding_key_on_stack = ZERO_KEY;
struct bkey *preceding_key_p = &preceding_key_on_stack;
@@ -895,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
else
preceding_key(k, &preceding_key_p);
- m = bch_btree_iter_init(b, &iter, preceding_key_p);
+ m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
- if (b->ops->insert_fixup(b, k, &iter, replace_key))
+ if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
return status;
status = BTREE_INSERT_STATUS_INSERT;
@@ -1100,33 +1100,33 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
btree_iter_cmp));
}
-static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search,
- struct bset_tree *start)
+static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search,
+ struct bset_tree *start)
{
struct bkey *ret = NULL;
- iter->size = ARRAY_SIZE(iter->data);
- iter->used = 0;
+ iter->iter.size = ARRAY_SIZE(iter->stack_data);
+ iter->iter.used = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter->b = b;
+ iter->iter.b = b;
#endif
for (; start <= bset_tree_last(b); start++) {
ret = bch_bset_search(b, start, search);
- bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
+ bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
}
return ret;
}
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
struct bkey *search)
{
- return __bch_btree_iter_init(b, iter, search, b->set);
+ return __bch_btree_iter_stack_init(b, iter, search, b->set);
}
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
@@ -1293,10 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
int oldsize = bch_count_data(b);
- __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
+ __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
if (start) {
unsigned int i;
@@ -1307,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
order = get_order(__set_bytes(b->set->data, keys));
}
- __btree_sort(b, &iter, start, order, false, state);
+ __btree_sort(b, &iter.iter, start, order, false, state);
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
@@ -1323,11 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(b, &iter, NULL);
+ bch_btree_iter_stack_init(b, &iter, NULL);
- btree_mergesort(b, new->set->data, &iter, false, true);
+ btree_mergesort(b, new->set->data, &iter.iter, false, true);
bch_time_stats_update(&state->time, start_time);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index d795c84246b0..011f6062c4c0 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -321,7 +321,14 @@ struct btree_iter {
#endif
struct btree_iter_set {
struct bkey *k, *end;
- } data[MAX_BSETS];
+ } data[];
+};
+
+/* Fixed-size btree_iter that can be allocated on the stack */
+
+struct btree_iter_stack {
+ struct btree_iter iter;
+ struct btree_iter_set stack_data[MAX_BSETS];
};
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
@@ -333,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end);
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search);
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search);
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
const struct bkey *search);
@@ -350,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
-#define for_each_key_filter(b, k, iter, filter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
+#define for_each_key_filter(b, k, stack_iter, filter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
+ filter));)
-#define for_each_key(b, k, iter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next(iter));)
+#define for_each_key(b, k, stack_iter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
/* Sorting */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 196cdacce38f..210b59007d98 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -36,6 +36,7 @@
#include <linux/sched/clock.h>
#include <linux/rculist.h>
#include <linux/delay.h>
+#include <linux/sort.h>
#include <trace/events/bcache.h>
/*
@@ -88,8 +89,6 @@
* Test module load/unload
*/
-#define MAX_NEED_GC 64
-#define MAX_SAVE_PRIO 72
#define MAX_GC_TIMES 100
#define MIN_GC_NODES 100
#define GC_SLEEP_MS 100
@@ -559,8 +558,6 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
}
}
-#define cmp_int(l, r) ((l > r) - (l < r))
-
#ifdef CONFIG_PROVE_LOCKING
static int btree_lock_cmp_fn(const struct lockdep_map *_a,
const struct lockdep_map *_b)
@@ -1309,7 +1306,7 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
uint8_t stale = 0;
unsigned int keys = 0, good_keys = 0;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bset_tree *t;
gc->nodes++;
@@ -1570,7 +1567,7 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
unsigned int ret = 0;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
@@ -1611,17 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
int ret = 0;
bool should_rewrite;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct gc_merge_info r[GC_MERGE_NODES];
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
- bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
+ bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
for (i = r; i < r + ARRAY_SIZE(r); i++)
i->b = ERR_PTR(-EINTR);
while (1) {
- k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true, b);
@@ -1740,18 +1738,20 @@ static void btree_gc_start(struct cache_set *c)
mutex_lock(&c->bucket_lock);
- c->gc_mark_valid = 0;
c->gc_done = ZERO_KEY;
ca = c->cache;
for_each_bucket(b, ca) {
b->last_gc = b->gen;
+ if (bch_can_invalidate_bucket(ca, b))
+ b->reclaimable_in_gc = 1;
if (!atomic_read(&b->pin)) {
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
}
+ c->gc_mark_valid = 0;
mutex_unlock(&c->bucket_lock);
}
@@ -1808,6 +1808,9 @@ static void bch_btree_gc_finish(struct cache_set *c)
for_each_bucket(b, ca) {
c->need_gc = max(c->need_gc, bucket_gc_gen(b));
+ if (b->reclaimable_in_gc)
+ b->reclaimable_in_gc = 0;
+
if (atomic_read(&b->pin))
continue;
@@ -1911,7 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
{
int ret = 0;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(b->c, b->level, k);
@@ -1919,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
bch_initial_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
- bch_btree_iter_init(&b->keys, &iter, NULL);
+ bch_btree_iter_stack_init(&b->keys, &iter, NULL);
do {
- k = bch_btree_iter_next_filter(&iter, &b->keys,
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad);
if (k) {
btree_node_prefetch(b, k);
@@ -1950,7 +1953,7 @@ static int bch_btree_check_thread(void *arg)
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
@@ -1959,8 +1962,8 @@ static int bch_btree_check_thread(void *arg)
ret = 0;
/* root node keys are checked before thread created */
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -1978,7 +1981,7 @@ static int bch_btree_check_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -2051,7 +2054,7 @@ int bch_btree_check(struct cache_set *c)
int ret = 0;
int i;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct btree_check_state check_state;
/* check and mark root node keys */
@@ -2547,11 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
if (b->level) {
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad))) {
ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
@@ -2580,11 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
{
int ret = MAP_CONTINUE;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
: bcache_btree(map_keys_recurse, k,
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index ebd500bdf0b2..26a6a535ec32 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -82,7 +82,7 @@ static void moving_init(struct moving_io *io)
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0);
bio_get(bio);
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_private = &io->cl;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 83d112bd2b1c..af345dc6fde1 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -369,10 +369,24 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
struct io *i;
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
(bio_op(bio) == REQ_OP_DISCARD))
goto skip;
+ if (c->gc_stats.in_use > CUTOFF_CACHE_ADD) {
+ /*
+ * If cached buckets are all clean now, 'true' will be
+ * returned and all requests will bypass the cache device.
+ * Then c->sectors_to_gc has no chance to be negative, and
+ * gc thread won't wake up and caching won't work forever.
+ * Here call force_wake_up_gc() to avoid such aftermath.
+ */
+ if (BDEV_STATE(&dc->sb) == BDEV_STATE_CLEAN &&
+ c->gc_mark_valid)
+ force_wake_up_gc(c);
+
+ goto skip;
+ }
+
if (mode == CACHE_MODE_NONE ||
(mode == CACHE_MODE_WRITEAROUND &&
op_is_write(bio_op(bio))))
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 68b02216033d..0056106495a7 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -123,7 +123,7 @@ void bch_cache_accounting_destroy(struct cache_accounting *acc)
kobject_put(&acc->day.kobj);
atomic_set(&acc->closing, 1);
- if (del_timer_sync(&acc->timer))
+ if (timer_delete_sync(&acc->timer))
closure_return(&acc->cl);
}
@@ -149,7 +149,7 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at)
static void scale_accounting(struct timer_list *t)
{
- struct cache_accounting *acc = from_timer(acc, t, timer);
+ struct cache_accounting *acc = timer_container_of(acc, t, timer);
#define move_stat(name) do { \
unsigned int t = atomic_xchg(&acc->collector.name, 0); \
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 330bcd9ea4a9..2ea490b9d370 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -171,7 +171,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
struct page *page;
unsigned int i;
- page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+ page = read_cache_page_gfp(bdev->bd_mapping,
SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
if (IS_ERR(page))
return "IO error";
@@ -293,8 +293,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
bio->bi_iter.bi_sector = SB_SECTOR;
- __bio_add_page(bio, virt_to_page(out), SB_SIZE,
- offset_in_page(out));
+ bio_add_virt_nofail(bio, out, SB_SIZE);
out->offset = cpu_to_le64(sb->offset);
@@ -546,7 +545,8 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
static struct uuid_entry *uuid_find_empty(struct cache_set *c)
{
- static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+ static const char zero_uuid[16] __nonstring =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
return uuid_find(c, zero_uuid);
}
@@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d)
bcache_device_detach(d);
if (disk) {
- ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(disk->first_minor));
+ ida_free(&bcache_device_idx,
+ first_minor_to_idx(disk->first_minor));
put_disk(disk);
}
@@ -897,7 +897,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
sector_t sectors, struct block_device *cached_bdev,
const struct block_device_operations *ops)
{
- struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
SIZE_MAX / sizeof(atomic_t));
struct queue_limits lim = {
@@ -909,6 +908,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
.io_min = block_size,
.logical_block_size = block_size,
.physical_block_size = block_size,
+ .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA,
};
uint64_t n;
int idx;
@@ -940,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
if (!d->full_dirty_stripes)
goto out_free_stripe_sectors_dirty;
- idx = ida_simple_get(&bcache_device_idx, 0,
- BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
+ idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1,
+ GFP_KERNEL);
if (idx < 0)
goto out_free_full_dirty_stripes;
@@ -974,19 +974,12 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->minors = BCACHE_MINORS;
d->disk->fops = ops;
d->disk->private_data = d;
-
- q = d->disk->queue;
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
-
- blk_queue_write_cache(q, true, true);
-
return 0;
out_bioset_exit:
bioset_exit(&d->bio_split);
out_ida_remove:
- ida_simple_remove(&bcache_device_idx, idx);
+ ida_free(&bcache_device_idx, idx);
out_free_full_dirty_stripes:
kvfree(d->full_dirty_stripes);
out_free_stripe_sectors_dirty:
@@ -1423,8 +1416,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
}
if (bdev_io_opt(dc->bdev))
- dc->partial_stripes_expensive =
- q->limits.raid_partial_stripes_expensive;
+ dc->partial_stripes_expensive = !!(q->limits.features &
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE);
ret = bcache_device_init(&dc->disk, block_size,
bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
@@ -1725,7 +1718,7 @@ static CLOSURE_CALLBACK(cache_set_flush)
if (!IS_ERR_OR_NULL(c->gc_thread))
kthread_stop(c->gc_thread);
- if (!IS_ERR(c->root))
+ if (!IS_ERR_OR_NULL(c->root))
list_add(&c->root->list, &c->btree_cache);
/*
@@ -1740,7 +1733,12 @@ static CLOSURE_CALLBACK(cache_set_flush)
mutex_unlock(&b->write_lock);
}
- if (ca->alloc_thread)
+ /*
+ * If the register_cache_set() call to bch_cache_set_alloc() failed,
+ * ca has not been assigned a value and return error.
+ * So we need check ca is not NULL during bch_cache_set_unregister().
+ */
+ if (ca && ca->alloc_thread)
kthread_stop(ca->alloc_thread);
if (c->journal.cur) {
@@ -1914,8 +1912,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
- sizeof(struct btree_iter_set);
+ iter_size = sizeof(struct btree_iter) +
+ ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
+ sizeof(struct btree_iter_set);
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
if (!c->devices)
@@ -2240,15 +2239,47 @@ static int cache_alloc(struct cache *ca)
bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0);
/*
- * when ca->sb.njournal_buckets is not zero, journal exists,
- * and in bch_journal_replay(), tree node may split,
- * so bucket of RESERVE_BTREE type is needed,
- * the worst situation is all journal buckets are valid journal,
- * and all the keys need to replay,
- * so the number of RESERVE_BTREE type buckets should be as much
- * as journal buckets
+ * When the cache disk is first registered, ca->sb.njournal_buckets
+ * is zero, and it is assigned in run_cache_set().
+ *
+ * When ca->sb.njournal_buckets is not zero, journal exists,
+ * and in bch_journal_replay(), tree node may split.
+ * The worst situation is all journal buckets are valid journal,
+ * and all the keys need to replay, so the number of RESERVE_BTREE
+ * type buckets should be as much as journal buckets.
+ *
+ * If the number of RESERVE_BTREE type buckets is too few, the
+ * bch_allocator_thread() may hang up and unable to allocate
+ * bucket. The situation is roughly as follows:
+ *
+ * 1. In bch_data_insert_keys(), if the operation is not op->replace,
+ * it will call the bch_journal(), which increments the journal_ref
+ * counter. This counter is only decremented after bch_btree_insert
+ * completes.
+ *
+ * 2. When calling bch_btree_insert, if the btree needs to split,
+ * it will call btree_split() and btree_check_reserve() to check
+ * whether there are enough reserved buckets in the RESERVE_BTREE
+ * slot. If not enough, bcache_btree_root() will repeatedly retry.
+ *
+ * 3. Normally, the bch_allocator_thread is responsible for filling
+ * the reservation slots from the free_inc bucket list. When the
+ * free_inc bucket list is exhausted, the bch_allocator_thread
+ * will call invalidate_buckets() until free_inc is refilled.
+ * Then bch_allocator_thread calls bch_prio_write() once. and
+ * bch_prio_write() will call bch_journal_meta() and waits for
+ * the journal write to complete.
+ *
+ * 4. During journal_write, journal_write_unlocked() is be called.
+ * If journal full occurs, journal_reclaim() and btree_flush_write()
+ * will be called sequentially, then retry journal_write.
+ *
+ * 5. When 2 and 4 occur together, IO will hung up and cannot recover.
+ *
+ * Therefore, reserve more RESERVE_BTREE type buckets.
*/
- btree_buckets = ca->sb.njournal_buckets ?: 8;
+ btree_buckets = clamp_t(size_t, ca->sb.nbuckets >> 7,
+ 32, SB_JOURNAL_BUCKETS);
free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
if (!free) {
ret = -EPERM;
@@ -2554,10 +2585,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (IS_ERR(bdev_file))
goto out_free_sb;
- err = "failed to set blocksize";
- if (set_blocksize(file_bdev(bdev_file), 4096))
- goto out_blkdev_put;
-
err = read_super(sb, file_bdev(bdev_file), &sb_disk);
if (err)
goto out_blkdev_put;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 6956beb55326..826b14cae4e5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -660,7 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
goto lock_root;
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index ae380bc3992e..410d8cb49e50 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * random utiility code, for bcache but in theory not specific to bcache
+ * random utility code, for bcache but in theory not specific to bcache
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 8827a6f130ad..302e75f1fc4b 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -334,7 +334,7 @@ static void dirty_init(struct keybuf_key *w)
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
if (!io->dc->writeback_percent)
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9;
bio->bi_private = w;
@@ -908,15 +908,15 @@ static int bch_dirty_init_thread(void *arg)
struct dirty_init_thrd_info *info = arg;
struct bch_dirty_init_state *state = info->state;
struct cache_set *c = state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
prev_idx = 0;
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -930,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -979,7 +979,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
int i;
struct btree *b = NULL;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;