summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/alloc.c21
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/bset.c44
-rw-r--r--drivers/md/bcache/bset.h28
-rw-r--r--drivers/md/bcache/btree.c47
-rw-r--r--drivers/md/bcache/request.c16
-rw-r--r--drivers/md/bcache/super.c21
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/writeback.c10
-rw-r--r--drivers/md/dm-bio-prison-v2.c3
-rw-r--r--drivers/md/dm-cache-target.c17
-rw-r--r--drivers/md/dm-clone-metadata.c5
-rw-r--r--drivers/md/dm-clone-target.c18
-rw-r--r--drivers/md/dm-core.h2
-rw-r--r--drivers/md/dm-crypt.c73
-rw-r--r--drivers/md/dm-delay.c60
-rw-r--r--drivers/md/dm-era-target.c3
-rw-r--r--drivers/md/dm-integrity.c1
-rw-r--r--drivers/md/dm-log-writes.c2
-rw-r--r--drivers/md/dm-mpath.c3
-rw-r--r--drivers/md/dm-snap.c2
-rw-r--r--drivers/md/dm-table.c33
-rw-r--r--drivers/md/dm-target.c1
-rw-r--r--drivers/md/dm-thin.c16
-rw-r--r--drivers/md/dm-vdo/Makefile2
-rw-r--r--drivers/md/dm-vdo/data-vio.c3
-rw-r--r--drivers/md/dm-vdo/dm-vdo-target.c4
-rw-r--r--drivers/md/dm-vdo/flush.c3
-rw-r--r--drivers/md/dm-vdo/indexer/io-factory.c2
-rw-r--r--drivers/md/dm-vdo/murmurhash3.c2
-rw-r--r--drivers/md/dm-zero.c1
-rw-r--r--drivers/md/dm-zone.c499
-rw-r--r--drivers/md/dm-zoned-target.c1
-rw-r--r--drivers/md/dm.c84
-rw-r--r--drivers/md/dm.h5
-rw-r--r--drivers/md/md-bitmap.c6
-rw-r--r--drivers/md/md.c7
-rw-r--r--drivers/md/md.h3
-rw-r--r--drivers/md/raid5.c15
39 files changed, 385 insertions, 681 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ce13c272c387..48ce750bf70a 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -129,12 +129,9 @@ static inline bool can_inc_bucket_gen(struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
- BUG_ON(!ca->set->gc_mark_valid);
-
- return (!GC_MARK(b) ||
- GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
- !atomic_read(&b->pin) &&
- can_inc_bucket_gen(b);
+ return (ca->set->gc_mark_valid || b->reclaimable_in_gc) &&
+ ((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
+ !atomic_read(&b->pin) && can_inc_bucket_gen(b));
}
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -148,6 +145,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO;
atomic_inc(&b->pin);
+ b->reclaimable_in_gc = 0;
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -352,8 +350,7 @@ static int bch_allocator_thread(void *arg)
*/
retry_invalidate:
- allocator_wait(ca, ca->set->gc_mark_valid &&
- !ca->invalidate_needs_gc);
+ allocator_wait(ca, !ca->invalidate_needs_gc);
invalidate_buckets(ca);
/*
@@ -501,8 +498,8 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
ca = c->cache;
b = bch_bucket_alloc(ca, reserve, wait);
- if (b == -1)
- goto err;
+ if (b < 0)
+ return -1;
k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
bucket_to_sector(c, b),
@@ -511,10 +508,6 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
SET_KEY_PTRS(k, 1);
return 0;
-err:
- bch_bucket_free(c, k);
- bkey_put(c, k);
- return -1;
}
int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 4e6afa89921f..1d33e40d26ea 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -200,6 +200,7 @@ struct bucket {
uint8_t gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
+ uint16_t reclaimable_in_gc:1;
};
/*
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 2bba4d6aaaa2..463eb13bd0b2 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -54,7 +54,7 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
unsigned int ret = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k;
if (b->ops->is_extents)
@@ -67,7 +67,7 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
const char *err;
for_each_key(b, k, &iter) {
@@ -879,7 +879,7 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey preceding_key_on_stack = ZERO_KEY;
struct bkey *preceding_key_p = &preceding_key_on_stack;
@@ -895,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
else
preceding_key(k, &preceding_key_p);
- m = bch_btree_iter_init(b, &iter, preceding_key_p);
+ m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
- if (b->ops->insert_fixup(b, k, &iter, replace_key))
+ if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
return status;
status = BTREE_INSERT_STATUS_INSERT;
@@ -1100,33 +1100,33 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
btree_iter_cmp));
}
-static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search,
- struct bset_tree *start)
+static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search,
+ struct bset_tree *start)
{
struct bkey *ret = NULL;
- iter->size = ARRAY_SIZE(iter->data);
- iter->used = 0;
+ iter->iter.size = ARRAY_SIZE(iter->stack_data);
+ iter->iter.used = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter->b = b;
+ iter->iter.b = b;
#endif
for (; start <= bset_tree_last(b); start++) {
ret = bch_bset_search(b, start, search);
- bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
+ bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
}
return ret;
}
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
struct bkey *search)
{
- return __bch_btree_iter_init(b, iter, search, b->set);
+ return __bch_btree_iter_stack_init(b, iter, search, b->set);
}
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
@@ -1293,10 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
int oldsize = bch_count_data(b);
- __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
+ __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
if (start) {
unsigned int i;
@@ -1307,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
order = get_order(__set_bytes(b->set->data, keys));
}
- __btree_sort(b, &iter, start, order, false, state);
+ __btree_sort(b, &iter.iter, start, order, false, state);
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
@@ -1323,11 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(b, &iter, NULL);
+ bch_btree_iter_stack_init(b, &iter, NULL);
- btree_mergesort(b, new->set->data, &iter, false, true);
+ btree_mergesort(b, new->set->data, &iter.iter, false, true);
bch_time_stats_update(&state->time, start_time);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index d795c84246b0..011f6062c4c0 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -321,7 +321,14 @@ struct btree_iter {
#endif
struct btree_iter_set {
struct bkey *k, *end;
- } data[MAX_BSETS];
+ } data[];
+};
+
+/* Fixed-size btree_iter that can be allocated on the stack */
+
+struct btree_iter_stack {
+ struct btree_iter iter;
+ struct btree_iter_set stack_data[MAX_BSETS];
};
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
@@ -333,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end);
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search);
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search);
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
const struct bkey *search);
@@ -350,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
-#define for_each_key_filter(b, k, iter, filter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
+#define for_each_key_filter(b, k, stack_iter, filter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
+ filter));)
-#define for_each_key(b, k, iter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next(iter));)
+#define for_each_key(b, k, stack_iter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
/* Sorting */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 196cdacce38f..4e6ccf2c8a0b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1309,7 +1309,7 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
uint8_t stale = 0;
unsigned int keys = 0, good_keys = 0;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bset_tree *t;
gc->nodes++;
@@ -1570,7 +1570,7 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
unsigned int ret = 0;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
@@ -1611,17 +1611,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
int ret = 0;
bool should_rewrite;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct gc_merge_info r[GC_MERGE_NODES];
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
- bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
+ bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
for (i = r; i < r + ARRAY_SIZE(r); i++)
i->b = ERR_PTR(-EINTR);
while (1) {
- k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true, b);
@@ -1740,18 +1741,20 @@ static void btree_gc_start(struct cache_set *c)
mutex_lock(&c->bucket_lock);
- c->gc_mark_valid = 0;
c->gc_done = ZERO_KEY;
ca = c->cache;
for_each_bucket(b, ca) {
b->last_gc = b->gen;
+ if (bch_can_invalidate_bucket(ca, b))
+ b->reclaimable_in_gc = 1;
if (!atomic_read(&b->pin)) {
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
}
+ c->gc_mark_valid = 0;
mutex_unlock(&c->bucket_lock);
}
@@ -1808,6 +1811,9 @@ static void bch_btree_gc_finish(struct cache_set *c)
for_each_bucket(b, ca) {
c->need_gc = max(c->need_gc, bucket_gc_gen(b));
+ if (b->reclaimable_in_gc)
+ b->reclaimable_in_gc = 0;
+
if (atomic_read(&b->pin))
continue;
@@ -1911,7 +1917,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
{
int ret = 0;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(b->c, b->level, k);
@@ -1919,10 +1925,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
bch_initial_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
- bch_btree_iter_init(&b->keys, &iter, NULL);
+ bch_btree_iter_stack_init(&b->keys, &iter, NULL);
do {
- k = bch_btree_iter_next_filter(&iter, &b->keys,
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad);
if (k) {
btree_node_prefetch(b, k);
@@ -1950,7 +1956,7 @@ static int bch_btree_check_thread(void *arg)
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
@@ -1959,8 +1965,8 @@ static int bch_btree_check_thread(void *arg)
ret = 0;
/* root node keys are checked before thread created */
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -1978,7 +1984,7 @@ static int bch_btree_check_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -2051,7 +2057,7 @@ int bch_btree_check(struct cache_set *c)
int ret = 0;
int i;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct btree_check_state check_state;
/* check and mark root node keys */
@@ -2547,11 +2553,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
if (b->level) {
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad))) {
ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
@@ -2580,11 +2586,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
{
int ret = MAP_CONTINUE;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
: bcache_btree(map_keys_recurse, k,
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 83d112bd2b1c..af345dc6fde1 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -369,10 +369,24 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
struct io *i;
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
(bio_op(bio) == REQ_OP_DISCARD))
goto skip;
+ if (c->gc_stats.in_use > CUTOFF_CACHE_ADD) {
+ /*
+ * If cached buckets are all clean now, 'true' will be
+ * returned and all requests will bypass the cache device.
+ * Then c->sectors_to_gc has no chance to be negative, and
+ * gc thread won't wake up and caching won't work forever.
+ * Here call force_wake_up_gc() to avoid such aftermath.
+ */
+ if (BDEV_STATE(&dc->sb) == BDEV_STATE_CLEAN &&
+ c->gc_mark_valid)
+ force_wake_up_gc(c);
+
+ goto skip;
+ }
+
if (mode == CACHE_MODE_NONE ||
(mode == CACHE_MODE_WRITEAROUND &&
op_is_write(bio_op(bio))))
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 330bcd9ea4a9..4d11fc664cb0 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -171,7 +171,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
struct page *page;
unsigned int i;
- page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+ page = read_cache_page_gfp(bdev->bd_mapping,
SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
if (IS_ERR(page))
return "IO error";
@@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d)
bcache_device_detach(d);
if (disk) {
- ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(disk->first_minor));
+ ida_free(&bcache_device_idx,
+ first_minor_to_idx(disk->first_minor));
put_disk(disk);
}
@@ -940,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
if (!d->full_dirty_stripes)
goto out_free_stripe_sectors_dirty;
- idx = ida_simple_get(&bcache_device_idx, 0,
- BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
+ idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1,
+ GFP_KERNEL);
if (idx < 0)
goto out_free_full_dirty_stripes;
@@ -986,7 +986,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
out_bioset_exit:
bioset_exit(&d->bio_split);
out_ida_remove:
- ida_simple_remove(&bcache_device_idx, idx);
+ ida_free(&bcache_device_idx, idx);
out_free_full_dirty_stripes:
kvfree(d->full_dirty_stripes);
out_free_stripe_sectors_dirty:
@@ -1914,8 +1914,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
- sizeof(struct btree_iter_set);
+ iter_size = sizeof(struct btree_iter) +
+ ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
+ sizeof(struct btree_iter_set);
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
if (!c->devices)
@@ -2554,10 +2555,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (IS_ERR(bdev_file))
goto out_free_sb;
- err = "failed to set blocksize";
- if (set_blocksize(file_bdev(bdev_file), 4096))
- goto out_blkdev_put;
-
err = read_super(sb, file_bdev(bdev_file), &sb_disk);
if (err)
goto out_blkdev_put;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 6956beb55326..826b14cae4e5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -660,7 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
goto lock_root;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 8827a6f130ad..792e070ccf38 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -908,15 +908,15 @@ static int bch_dirty_init_thread(void *arg)
struct dirty_init_thrd_info *info = arg;
struct bch_dirty_init_state *state = info->state;
struct cache_set *c = state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
prev_idx = 0;
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -930,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -979,7 +979,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
int i;
struct btree *b = NULL;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index fd852981ef9c..cf433b0cf742 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -321,8 +321,7 @@ static bool __unlock(struct dm_bio_prison_v2 *prison,
{
BUG_ON(!cell->exclusive_lock);
- bio_list_merge(bios, &cell->bios);
- bio_list_init(&cell->bios);
+ bio_list_merge_init(bios, &cell->bios);
if (cell->shared_count) {
cell->exclusive_lock = false;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 911f73f7ebba..16884b585053 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -115,8 +115,7 @@ static void __commit(struct work_struct *_ws)
*/
spin_lock_irq(&b->lock);
list_splice_init(&b->work_items, &work_items);
- bio_list_merge(&bios, &b->bios);
- bio_list_init(&b->bios);
+ bio_list_merge_init(&bios, &b->bios);
b->commit_scheduled = false;
spin_unlock_irq(&b->lock);
@@ -565,8 +564,7 @@ static void defer_bio(struct cache *cache, struct bio *bio)
static void defer_bios(struct cache *cache, struct bio_list *bios)
{
spin_lock_irq(&cache->lock);
- bio_list_merge(&cache->deferred_bios, bios);
- bio_list_init(bios);
+ bio_list_merge_init(&cache->deferred_bios, bios);
spin_unlock_irq(&cache->lock);
wake_deferred_bio_worker(cache);
@@ -1816,8 +1814,7 @@ static void process_deferred_bios(struct work_struct *ws)
bio_list_init(&bios);
spin_lock_irq(&cache->lock);
- bio_list_merge(&bios, &cache->deferred_bios);
- bio_list_init(&cache->deferred_bios);
+ bio_list_merge_init(&bios, &cache->deferred_bios);
spin_unlock_irq(&cache->lock);
while ((bio = bio_list_pop(&bios))) {
@@ -1847,8 +1844,7 @@ static void requeue_deferred_bios(struct cache *cache)
struct bio_list bios;
bio_list_init(&bios);
- bio_list_merge(&bios, &cache->deferred_bios);
- bio_list_init(&cache->deferred_bios);
+ bio_list_merge_init(&bios, &cache->deferred_bios);
while ((bio = bio_list_pop(&bios))) {
bio->bi_status = BLK_STS_DM_REQUEUE;
@@ -3394,8 +3390,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
if (!cache->features.discard_passdown) {
/* No passdown is done so setting own virtual limits */
- limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
- cache->origin_sectors);
+ limits->max_hw_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+ cache->origin_sectors);
limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
return;
}
@@ -3404,7 +3400,6 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
* cache_iterate_devices() is stacking both origin and fast device limits
* but discards aren't passed to fast device, so inherit origin's limits.
*/
- limits->max_discard_sectors = origin_limits->max_discard_sectors;
limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
limits->discard_granularity = origin_limits->discard_granularity;
limits->discard_alignment = origin_limits->discard_alignment;
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
index c43d55672bce..47c1fa7aad8b 100644
--- a/drivers/md/dm-clone-metadata.c
+++ b/drivers/md/dm-clone-metadata.c
@@ -465,11 +465,6 @@ static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
/*---------------------------------------------------------------------------*/
-static size_t bitmap_size(unsigned long nr_bits)
-{
- return BITS_TO_LONGS(nr_bits) * sizeof(long);
-}
-
static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
unsigned long nr_regions)
{
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 94b2fc33f64b..ad79b52ffc14 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -1181,8 +1181,7 @@ static void process_deferred_discards(struct clone *clone)
struct bio_list discards = BIO_EMPTY_LIST;
spin_lock_irq(&clone->lock);
- bio_list_merge(&discards, &clone->deferred_discard_bios);
- bio_list_init(&clone->deferred_discard_bios);
+ bio_list_merge_init(&discards, &clone->deferred_discard_bios);
spin_unlock_irq(&clone->lock);
if (bio_list_empty(&discards))
@@ -1215,8 +1214,7 @@ static void process_deferred_bios(struct clone *clone)
struct bio_list bios = BIO_EMPTY_LIST;
spin_lock_irq(&clone->lock);
- bio_list_merge(&bios, &clone->deferred_bios);
- bio_list_init(&clone->deferred_bios);
+ bio_list_merge_init(&bios, &clone->deferred_bios);
spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios))
@@ -1237,11 +1235,9 @@ static void process_deferred_flush_bios(struct clone *clone)
* before issuing them or signaling their completion.
*/
spin_lock_irq(&clone->lock);
- bio_list_merge(&bios, &clone->deferred_flush_bios);
- bio_list_init(&clone->deferred_flush_bios);
-
- bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
- bio_list_init(&clone->deferred_flush_completions);
+ bio_list_merge_init(&bios, &clone->deferred_flush_bios);
+ bio_list_merge_init(&bio_completions,
+ &clone->deferred_flush_completions);
spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
@@ -2050,7 +2046,8 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
/* No passdown is done so we set our own virtual limits */
limits->discard_granularity = clone->region_size << SECTOR_SHIFT;
- limits->max_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT, clone->region_size);
+ limits->max_hw_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT,
+ clone->region_size);
return;
}
@@ -2059,7 +2056,6 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
* device limits but discards aren't passed to the source device, so
* inherit destination's limits.
*/
- limits->max_discard_sectors = dest_limits->max_discard_sectors;
limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
limits->discard_granularity = dest_limits->discard_granularity;
limits->discard_alignment = dest_limits->discard_alignment;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index e6757a30dcca..08700bfc3e23 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -140,7 +140,7 @@ struct mapped_device {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_zones;
- unsigned int *zwp_offset;
+ void *zone_revalidate_map;
#endif
#ifdef CONFIG_IMA
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9a74c6316c5d..1b7a97cc3779 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -47,6 +47,8 @@
#define DM_MSG_PREFIX "crypt"
+static DEFINE_IDA(workqueue_ida);
+
/*
* context holding the current state of a multi-part conversion
*/
@@ -137,9 +139,9 @@ struct iv_elephant_private {
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
- DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
- DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
- DM_CRYPT_WRITE_INLINE };
+ DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
+ DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
+ DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };
enum cipher_flags {
CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */
@@ -184,6 +186,7 @@ struct crypt_config {
struct crypto_aead **tfms_aead;
} cipher_tfm;
unsigned int tfms_count;
+ int workqueue_id;
unsigned long cipher_flags;
/*
@@ -1653,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
/*
* Generate a new unfragmented bio with the given size
- * This should never violate the device limitations (but only because
- * max_segment_size is being constrained to PAGE_SIZE).
+ * This should never violate the device limitations (but if it did then block
+ * core should split the bio as needed).
*
* This function may be called concurrently. If we allocate from the mempool
* concurrently, there is a possibility of deadlock. For example, if we have
@@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);
+ if (cc->workqueue_id)
+ ida_free(&workqueue_ida, cc->workqueue_id);
+
crypt_free_tfms(cc);
bioset_exit(&cc->bs);
@@ -3134,7 +3140,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
struct crypt_config *cc = ti->private;
struct dm_arg_set as;
static const struct dm_arg _args[] = {
- {0, 8, "Invalid number of feature args"},
+ {0, 9, "Invalid number of feature args"},
};
unsigned int opt_params, val;
const char *opt_string, *sval;
@@ -3161,6 +3167,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
else if (!strcasecmp(opt_string, "same_cpu_crypt"))
set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+ else if (!strcasecmp(opt_string, "high_priority"))
+ set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
@@ -3230,8 +3238,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
const char *devname = dm_table_device_name(ti->table);
- int key_size;
+ int key_size, wq_id;
unsigned int align_mask;
+ unsigned int common_wq_flags;
unsigned long long tmpll;
int ret;
size_t iv_size_padding, additional_req_size;
@@ -3398,20 +3407,38 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->tag_pool_max_sectors <<= cc->sector_shift;
}
+ wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
+ if (wq_id < 0) {
+ ti->error = "Couldn't get workqueue id";
+ ret = wq_id;
+ goto bad;
+ }
+ cc->workqueue_id = wq_id;
+
ret = -ENOMEM;
- cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
+ common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
+ if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+ common_wq_flags |= WQ_HIGHPRI;
+
+ cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
- if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
- 1, devname);
- else
- cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
- num_online_cpus(), devname);
+ if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
+ cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
+ common_wq_flags | WQ_CPU_INTENSIVE,
+ 1, devname, wq_id);
+ } else {
+ /*
+ * While crypt_queue is certainly CPU intensive, the use of
+ * WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
+ */
+ cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
+ common_wq_flags | WQ_UNBOUND,
+ num_online_cpus(), devname, wq_id);
+ }
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
@@ -3427,6 +3454,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Couldn't spawn write thread";
goto bad;
}
+ if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+ set_user_nice(cc->write_thread, MIN_NICE);
ti->num_flush_bios = 1;
ti->limit_swap_bios = true;
@@ -3547,6 +3576,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
num_feature_args += !!ti->num_discard_bios;
num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+ num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
@@ -3560,6 +3590,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" allow_discards");
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
DMEMIT(" same_cpu_crypt");
+ if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+ DMEMIT(" high_priority");
if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
DMEMIT(" submit_from_crypt_cpus");
if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
@@ -3579,6 +3611,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT_TARGET_NAME_VERSION(ti->type);
DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
+ DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
'y' : 'n');
DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
@@ -3688,14 +3721,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct crypt_config *cc = ti->private;
- /*
- * Unfortunate constraint that is required to avoid the potential
- * for exceeding underlying device's max_segments limits -- due to
- * crypt_alloc_buffer() possibly allocating pages for the encryption
- * bio that are not as physically contiguous as the original bio.
- */
- limits->max_segment_size = PAGE_SIZE;
-
limits->logical_block_size =
max_t(unsigned int, limits->logical_block_size, cc->sector_size);
limits->physical_block_size =
@@ -3706,7 +3731,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 25, 0},
+ .version = {1, 26, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 5eabdb06c649..08f6387620c1 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -28,7 +28,8 @@ struct delay_class {
struct delay_c {
struct timer_list delay_timer;
- struct mutex timer_lock;
+ struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
+ spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
@@ -49,8 +50,6 @@ struct dm_delay_info {
unsigned long expires;
};
-static DEFINE_MUTEX(delayed_bios_lock);
-
static void handle_delayed_timer(struct timer_list *t)
{
struct delay_c *dc = from_timer(dc, t, delay_timer);
@@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)
static void queue_timeout(struct delay_c *dc, unsigned long expires)
{
- mutex_lock(&dc->timer_lock);
-
- if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
- mod_timer(&dc->delay_timer, expires);
-
- mutex_unlock(&dc->timer_lock);
+ timer_reduce(&dc->delay_timer, expires);
}
static inline bool delay_is_fast(struct delay_c *dc)
@@ -89,12 +83,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
struct bio_list flush_bio_list;
+ LIST_HEAD(local_list);
unsigned long next_expires = 0;
bool start_timer = false;
bio_list_init(&flush_bio_list);
- mutex_lock(&delayed_bios_lock);
- list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ mutex_lock(&dc->process_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
+ list_replace_init(&dc->delayed_bios, &local_list);
+ spin_unlock(&dc->delayed_bios_lock);
+ list_for_each_entry_safe(delayed, next, &local_list, list) {
cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
@@ -114,7 +112,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
}
}
}
- mutex_unlock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
+ list_splice(&local_list, &dc->delayed_bios);
+ spin_unlock(&dc->delayed_bios_lock);
+ mutex_unlock(&dc->process_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
@@ -128,13 +129,13 @@ static int flush_worker_fn(void *data)
while (!kthread_should_stop()) {
flush_delayed_bios(dc, false);
- mutex_lock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
schedule();
} else {
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
cond_resched();
}
}
@@ -154,8 +155,10 @@ static void delay_dtr(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- if (dc->kdelayd_wq)
+ if (dc->kdelayd_wq) {
+ timer_shutdown_sync(&dc->delay_timer);
destroy_workqueue(dc->kdelayd_wq);
+ }
if (dc->read.dev)
dm_put_device(ti, dc->read.dev);
@@ -166,7 +169,7 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
- mutex_destroy(&dc->timer_lock);
+ mutex_destroy(&dc->process_bios_lock);
kfree(dc);
}
@@ -224,7 +227,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
- mutex_init(&dc->timer_lock);
+ mutex_init(&dc->process_bios_lock);
+ spin_lock_init(&dc->delayed_bios_lock);
dc->may_delay = true;
dc->argc = argc;
@@ -240,19 +244,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret)
goto bad;
- max_delay = max(max_delay, dc->write.delay);
- max_delay = max(max_delay, dc->flush.delay);
goto out;
}
ret = delay_class_ctr(ti, &dc->write, argv + 3);
if (ret)
goto bad;
+ max_delay = max(max_delay, dc->write.delay);
+
if (argc == 6) {
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret)
goto bad;
- max_delay = max(max_delay, dc->flush.delay);
goto out;
}
@@ -267,8 +270,7 @@ out:
* In case of small requested delays, use kthread instead of
* timers and workqueue to achieve better latency.
*/
- dc->worker = kthread_create(&flush_worker_fn, dc,
- "dm-delay-flush-worker");
+ dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
dc->worker = NULL;
@@ -309,14 +311,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->context = dc;
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
- mutex_lock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
if (unlikely(!dc->may_delay)) {
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
return DM_MAPIO_REMAPPED;
}
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
if (delay_is_fast(dc))
wake_up_process(dc->worker);
@@ -330,12 +332,12 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- mutex_lock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
dc->may_delay = false;
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
if (!delay_is_fast(dc))
- del_timer_sync(&dc->delay_timer);
+ timer_delete(&dc->delay_timer);
flush_delayed_bios(dc, true);
}
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 6acfa5bf97a4..8f81e597858d 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1272,8 +1272,7 @@ static void process_deferred_bios(struct era *era)
bio_list_init(&marked_bios);
spin_lock(&era->deferred_lock);
- bio_list_merge(&deferred_bios, &era->deferred_bios);
- bio_list_init(&era->deferred_bios);
+ bio_list_merge_init(&deferred_bios, &era->deferred_bios);
spin_unlock(&era->deferred_lock);
if (bio_list_empty(&deferred_bios))
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 7f3dc8ee6ab8..417fddebe367 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3492,6 +3492,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
limits->dma_alignment = limits->logical_block_size - 1;
+ limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
}
limits->max_integrity_segments = USHRT_MAX;
}
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index f17a6cf2284e..8d7df8303d0a 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -871,7 +871,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
if (!bdev_max_discard_sectors(lc->dev->bdev)) {
lc->device_supports_discard = false;
limits->discard_granularity = lc->sectorsize;
- limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
+ limits->max_hw_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
}
limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev);
limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 05d1328d1811..15b681b90153 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -704,8 +704,7 @@ static void process_queued_bios(struct work_struct *work)
return;
}
- bio_list_merge(&bios, &m->queued_bios);
- bio_list_init(&m->queued_bios);
+ bio_list_merge_init(&bios, &m->queued_bios);
spin_unlock_irqrestore(&m->lock, flags);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0ace06d1bee3..f40c18da4000 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2410,7 +2410,7 @@ static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
/* All discards are split on chunk_size boundary */
limits->discard_granularity = snap->store->chunk_size;
- limits->max_discard_sectors = snap->store->chunk_size;
+ limits->max_hw_discard_sectors = snap->store->chunk_size;
up_read(&_origins_lock);
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 41f1d731ae5a..b2d5246cff21 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1963,26 +1963,23 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
bool wc = false, fua = false;
int r;
- /*
- * Copy table's limits to the DM device's request_queue
- */
- q->limits = *limits;
-
if (dm_table_supports_nowait(t))
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
else
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
if (!dm_table_supports_discards(t)) {
- q->limits.max_discard_sectors = 0;
- q->limits.max_hw_discard_sectors = 0;
- q->limits.discard_granularity = 0;
- q->limits.discard_alignment = 0;
- q->limits.discard_misaligned = 0;
+ limits->max_hw_discard_sectors = 0;
+ limits->discard_granularity = 0;
+ limits->discard_alignment = 0;
+ limits->discard_misaligned = 0;
}
+ if (!dm_table_supports_write_zeroes(t))
+ limits->max_write_zeroes_sectors = 0;
+
if (!dm_table_supports_secure_erase(t))
- q->limits.max_secure_erase_sectors = 0;
+ limits->max_secure_erase_sectors = 0;
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
@@ -2007,9 +2004,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- if (!dm_table_supports_write_zeroes(t))
- q->limits.max_write_zeroes_sectors = 0;
-
dm_table_verify_integrity(t);
/*
@@ -2038,16 +2032,17 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
* For a zoned target, setup the zones related queue attributes
* and resources necessary for zone append emulation if necessary.
*/
- if (blk_queue_is_zoned(q)) {
- r = dm_set_zones_restrictions(t, q);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
+ r = dm_set_zones_restrictions(t, q, limits);
if (r)
return r;
- if (!static_key_enabled(&zoned_enabled.key))
- static_branch_enable(&zoned_enabled);
}
+ r = queue_limits_set(q, limits);
+ if (r)
+ return r;
+
dm_update_crypto_profile(q, t);
- disk_update_readahead(t->md->disk);
/*
* Check for request-based device is left to
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 0c4efb0bef8a..652627aea11b 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -249,7 +249,6 @@ static int io_err_iterate_devices(struct dm_target *ti,
static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
- limits->max_discard_sectors = UINT_MAX;
limits->max_hw_discard_sectors = UINT_MAX;
limits->discard_granularity = 512;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4793ad2aa1f7..991f77a5885b 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -592,12 +592,6 @@ struct dm_thin_endio_hook {
struct dm_bio_prison_cell *cell;
};
-static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
-{
- bio_list_merge(bios, master);
- bio_list_init(master);
-}
-
static void error_bio_list(struct bio_list *bios, blk_status_t error)
{
struct bio *bio;
@@ -616,7 +610,7 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
bio_list_init(&bios);
spin_lock_irq(&tc->lock);
- __merge_bio_list(&bios, master);
+ bio_list_merge_init(&bios, master);
spin_unlock_irq(&tc->lock);
error_bio_list(&bios, error);
@@ -645,8 +639,8 @@ static void requeue_io(struct thin_c *tc)
bio_list_init(&bios);
spin_lock_irq(&tc->lock);
- __merge_bio_list(&bios, &tc->deferred_bio_list);
- __merge_bio_list(&bios, &tc->retry_on_resume_list);
+ bio_list_merge_init(&bios, &tc->deferred_bio_list);
+ bio_list_merge_init(&bios, &tc->retry_on_resume_list);
spin_unlock_irq(&tc->lock);
error_bio_list(&bios, BLK_STS_DM_REQUEUE);
@@ -4100,7 +4094,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
if (pt->adjusted_pf.discard_enabled) {
disable_discard_passdown_if_not_supported(pt);
if (!pt->adjusted_pf.discard_passdown)
- limits->max_discard_sectors = 0;
+ limits->max_hw_discard_sectors = 0;
/*
* The pool uses the same discard limits as the underlying data
* device. DM core has already set this up.
@@ -4497,7 +4491,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
if (pool->pf.discard_enabled) {
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
- limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
+ limits->max_hw_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
}
}
diff --git a/drivers/md/dm-vdo/Makefile b/drivers/md/dm-vdo/Makefile
index 33e09abc6acd..9476957bfbf4 100644
--- a/drivers/md/dm-vdo/Makefile
+++ b/drivers/md/dm-vdo/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I$(srctree)/$(src) -I$(srctree)/$(src)/indexer
+ccflags-y := -I$(src) -I$(src)/indexer
obj-$(CONFIG_DM_VDO) += dm-vdo.o
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
index 94f6f1ccfb7d..ab3ea8337809 100644
--- a/drivers/md/dm-vdo/data-vio.c
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -604,8 +604,7 @@ static void assign_discard_permit(struct limiter *limiter)
static void get_waiters(struct limiter *limiter)
{
- bio_list_merge(&limiter->waiters, &limiter->new_waiters);
- bio_list_init(&limiter->new_waiters);
+ bio_list_merge_init(&limiter->waiters, &limiter->new_waiters);
}
static inline struct data_vio *get_available_data_vio(struct data_vio_pool *pool)
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index 5a4b0a927f56..b423bec6458b 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -878,7 +878,7 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti,
}
if (config->version == 0) {
- u64 device_size = i_size_read(config->owned_device->bdev->bd_inode);
+ u64 device_size = bdev_nr_bytes(config->owned_device->bdev);
config->physical_blocks = device_size / VDO_BLOCK_SIZE;
}
@@ -1011,7 +1011,7 @@ static void vdo_status(struct dm_target *ti, status_type_t status_type,
static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo)
{
- return i_size_read(vdo_get_backing_device(vdo)->bd_inode) / VDO_BLOCK_SIZE;
+ return bdev_nr_bytes(vdo_get_backing_device(vdo)) / VDO_BLOCK_SIZE;
}
static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc,
diff --git a/drivers/md/dm-vdo/flush.c b/drivers/md/dm-vdo/flush.c
index 57e87f0d7069..dd4fdee2ca0c 100644
--- a/drivers/md/dm-vdo/flush.c
+++ b/drivers/md/dm-vdo/flush.c
@@ -369,8 +369,7 @@ void vdo_dump_flusher(const struct flusher *flusher)
static void initialize_flush(struct vdo_flush *flush, struct vdo *vdo)
{
bio_list_init(&flush->bios);
- bio_list_merge(&flush->bios, &vdo->flusher->waiting_flush_bios);
- bio_list_init(&vdo->flusher->waiting_flush_bios);
+ bio_list_merge_init(&flush->bios, &vdo->flusher->waiting_flush_bios);
}
static void launch_flush(struct vdo_flush *flush)
diff --git a/drivers/md/dm-vdo/indexer/io-factory.c b/drivers/md/dm-vdo/indexer/io-factory.c
index 515765d35794..1bee9d63dc0a 100644
--- a/drivers/md/dm-vdo/indexer/io-factory.c
+++ b/drivers/md/dm-vdo/indexer/io-factory.c
@@ -90,7 +90,7 @@ void uds_put_io_factory(struct io_factory *factory)
size_t uds_get_writable_size(struct io_factory *factory)
{
- return i_size_read(factory->bdev->bd_inode);
+ return bdev_nr_bytes(factory->bdev);
}
/* Create a struct dm_bufio_client for an index region starting at offset. */
diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c
index 01d2743444ec..3a989efae142 100644
--- a/drivers/md/dm-vdo/murmurhash3.c
+++ b/drivers/md/dm-vdo/murmurhash3.c
@@ -137,7 +137,7 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
break;
default:
break;
- };
+ }
}
/* finalization */
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 3b13e6eb1aa4..9a0bb623e823 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -61,7 +61,6 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
static void zero_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
- limits->max_discard_sectors = UINT_MAX;
limits->max_hw_discard_sectors = UINT_MAX;
limits->discard_granularity = 512;
}
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index eb9832b22b14..5d66d916730e 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -60,16 +60,23 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
struct dm_table *map;
int srcu_idx, ret;
- if (dm_suspended_md(md))
- return -EAGAIN;
+ if (!md->zone_revalidate_map) {
+ /* Regular user context */
+ if (dm_suspended_md(md))
+ return -EAGAIN;
- map = dm_get_live_table(md, &srcu_idx);
- if (!map)
- return -EIO;
+ map = dm_get_live_table(md, &srcu_idx);
+ if (!map)
+ return -EIO;
+ } else {
+ /* Zone revalidation during __bind() */
+ map = md->zone_revalidate_map;
+ }
ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data);
- dm_put_live_table(md, srcu_idx);
+ if (!md->zone_revalidate_map)
+ dm_put_live_table(md, srcu_idx);
return ret;
}
@@ -138,81 +145,17 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
}
}
-void dm_cleanup_zoned_dev(struct mapped_device *md)
-{
- if (md->disk) {
- bitmap_free(md->disk->conv_zones_bitmap);
- md->disk->conv_zones_bitmap = NULL;
- bitmap_free(md->disk->seq_zones_wlock);
- md->disk->seq_zones_wlock = NULL;
- }
-
- kvfree(md->zwp_offset);
- md->zwp_offset = NULL;
- md->nr_zones = 0;
-}
-
-static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone)
-{
- switch (zone->cond) {
- case BLK_ZONE_COND_IMP_OPEN:
- case BLK_ZONE_COND_EXP_OPEN:
- case BLK_ZONE_COND_CLOSED:
- return zone->wp - zone->start;
- case BLK_ZONE_COND_FULL:
- return zone->len;
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_NOT_WP:
- case BLK_ZONE_COND_OFFLINE:
- case BLK_ZONE_COND_READONLY:
- default:
- /*
- * Conventional, offline and read-only zones do not have a valid
- * write pointer. Use 0 as for an empty zone.
- */
- return 0;
- }
-}
-
-static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
+/*
+ * Count conventional zones of a mapped zoned device. If the device
+ * only has conventional zones, do not expose it as zoned.
+ */
+static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
{
- struct mapped_device *md = data;
- struct gendisk *disk = md->disk;
+ unsigned int *nr_conv_zones = data;
- switch (zone->type) {
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!disk->conv_zones_bitmap) {
- disk->conv_zones_bitmap = bitmap_zalloc(disk->nr_zones,
- GFP_NOIO);
- if (!disk->conv_zones_bitmap)
- return -ENOMEM;
- }
- set_bit(idx, disk->conv_zones_bitmap);
- break;
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
- if (!disk->seq_zones_wlock) {
- disk->seq_zones_wlock = bitmap_zalloc(disk->nr_zones,
- GFP_NOIO);
- if (!disk->seq_zones_wlock)
- return -ENOMEM;
- }
- if (!md->zwp_offset) {
- md->zwp_offset =
- kvcalloc(disk->nr_zones, sizeof(unsigned int),
- GFP_KERNEL);
- if (!md->zwp_offset)
- return -ENOMEM;
- }
- md->zwp_offset[idx] = dm_get_zone_wp_offset(zone);
-
- break;
- default:
- DMERR("Invalid zone type 0x%x at sectors %llu",
- (int)zone->type, zone->start);
- return -ENODEV;
- }
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ (*nr_conv_zones)++;
return 0;
}
@@ -226,41 +169,32 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
{
struct gendisk *disk = md->disk;
- unsigned int noio_flag;
int ret;
- /*
- * Check if something changed. If yes, cleanup the current resources
- * and reallocate everything.
- */
+ /* Revalidate only if something changed. */
if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
- dm_cleanup_zoned_dev(md);
+ md->nr_zones = 0;
+
if (md->nr_zones)
return 0;
/*
- * Scan all zones to initialize everything. Ensure that all vmalloc
- * operations in this context are done as if GFP_NOIO was specified.
+ * Our table is not live yet. So the call to dm_get_live_table()
+ * in dm_blk_report_zones() will fail. Set a temporary pointer to
+ * our table for dm_blk_report_zones() to use directly.
*/
- noio_flag = memalloc_noio_save();
- ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones,
- dm_zone_revalidate_cb, md);
- memalloc_noio_restore(noio_flag);
- if (ret < 0)
- goto err;
- if (ret != disk->nr_zones) {
- ret = -EIO;
- goto err;
+ md->zone_revalidate_map = t;
+ ret = blk_revalidate_disk_zones(disk);
+ md->zone_revalidate_map = NULL;
+
+ if (ret) {
+ DMERR("Revalidate zones failed %d", ret);
+ return ret;
}
md->nr_zones = disk->nr_zones;
return 0;
-
-err:
- DMERR("Revalidate zones failed %d", ret);
- dm_cleanup_zoned_dev(md);
- return ret;
}
static int device_not_zone_append_capable(struct dm_target *ti,
@@ -286,297 +220,68 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
return true;
}
-int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
+int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
+ struct queue_limits *lim)
{
struct mapped_device *md = t->md;
+ struct gendisk *disk = md->disk;
+ unsigned int nr_conv_zones = 0;
+ int ret;
/*
- * For a zoned target, the number of zones should be updated for the
- * correct value to be exposed in sysfs queue/nr_zones.
+ * Check if zone append is natively supported, and if not, set the
+ * mapped device queue as needing zone append emulation.
*/
WARN_ON_ONCE(queue_is_mq(q));
- md->disk->nr_zones = bdev_nr_zones(md->disk->part0);
-
- /* Check if zone append is natively supported */
if (dm_table_supports_zone_append(t)) {
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- dm_cleanup_zoned_dev(md);
- return 0;
+ } else {
+ set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ lim->max_zone_append_sectors = 0;
}
- /*
- * Mark the mapped device as needing zone append emulation and
- * initialize the emulation resources once the capacity is set.
- */
- set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
if (!get_capacity(md->disk))
return 0;
- return dm_revalidate_zones(md, t);
-}
-
-static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- unsigned int *wp_offset = data;
-
- *wp_offset = dm_get_zone_wp_offset(zone);
-
- return 0;
-}
-
-static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
- unsigned int *wp_ofst)
-{
- sector_t sector = zno * bdev_zone_sectors(md->disk->part0);
- unsigned int noio_flag;
- struct dm_table *t;
- int srcu_idx, ret;
-
- t = dm_get_live_table(md, &srcu_idx);
- if (!t)
- return -EIO;
-
- /*
- * Ensure that all memory allocations in this context are done as if
- * GFP_NOIO was specified.
- */
- noio_flag = memalloc_noio_save();
- ret = dm_blk_do_report_zones(md, t, sector, 1,
- dm_update_zone_wp_offset_cb, wp_ofst);
- memalloc_noio_restore(noio_flag);
-
- dm_put_live_table(md, srcu_idx);
-
- if (ret != 1)
- return -EIO;
-
- return 0;
-}
-
-struct orig_bio_details {
- enum req_op op;
- unsigned int nr_sectors;
-};
-
-/*
- * First phase of BIO mapping for targets with zone append emulation:
- * check all BIO that change a zone writer pointer and change zone
- * append operations into regular write operations.
- */
-static bool dm_zone_map_bio_begin(struct mapped_device *md,
- unsigned int zno, struct bio *clone)
-{
- sector_t zsectors = bdev_zone_sectors(md->disk->part0);
- unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
-
- /*
- * If the target zone is in an error state, recover by inspecting the
- * zone to get its current write pointer position. Note that since the
- * target zone is already locked, a BIO issuing context should never
- * see the zone write in the DM_ZONE_UPDATING_WP_OFST state.
- */
- if (zwp_offset == DM_ZONE_INVALID_WP_OFST) {
- if (dm_update_zone_wp_offset(md, zno, &zwp_offset))
- return false;
- WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
- }
-
- switch (bio_op(clone)) {
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_FINISH:
- return true;
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- /* Writes must be aligned to the zone write pointer */
- if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset)
- return false;
- break;
- case REQ_OP_ZONE_APPEND:
- /*
- * Change zone append operations into a non-mergeable regular
- * writes directed at the current write pointer position of the
- * target zone.
- */
- clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
- (clone->bi_opf & (~REQ_OP_MASK));
- clone->bi_iter.bi_sector += zwp_offset;
- break;
- default:
- DMWARN_LIMIT("Invalid BIO operation");
- return false;
- }
-
- /* Cannot write to a full zone */
- if (zwp_offset >= zsectors)
- return false;
-
- return true;
-}
-
-/*
- * Second phase of BIO mapping for targets with zone append emulation:
- * update the zone write pointer offset array to account for the additional
- * data written to a zone. Note that at this point, the remapped clone BIO
- * may already have completed, so we do not touch it.
- */
-static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno,
- struct orig_bio_details *orig_bio_details,
- unsigned int nr_sectors)
-{
- unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
-
- /* The clone BIO may already have been completed and failed */
- if (zwp_offset == DM_ZONE_INVALID_WP_OFST)
- return BLK_STS_IOERR;
-
- /* Update the zone wp offset */
- switch (orig_bio_details->op) {
- case REQ_OP_ZONE_RESET:
- WRITE_ONCE(md->zwp_offset[zno], 0);
- return BLK_STS_OK;
- case REQ_OP_ZONE_FINISH:
- WRITE_ONCE(md->zwp_offset[zno],
- bdev_zone_sectors(md->disk->part0));
- return BLK_STS_OK;
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
- return BLK_STS_OK;
- case REQ_OP_ZONE_APPEND:
- /*
- * Check that the target did not truncate the write operation
- * emulating a zone append.
- */
- if (nr_sectors != orig_bio_details->nr_sectors) {
- DMWARN_LIMIT("Truncated write for zone append");
- return BLK_STS_IOERR;
- }
- WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
- return BLK_STS_OK;
- default:
- DMWARN_LIMIT("Invalid BIO operation");
- return BLK_STS_IOERR;
- }
-}
-
-static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno,
- struct bio *clone)
-{
- if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)))
- return;
-
- wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
- bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED);
-}
-
-static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno,
- struct bio *clone)
-{
- if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
- return;
-
- WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock));
- clear_bit_unlock(zno, disk->seq_zones_wlock);
- smp_mb__after_atomic();
- wake_up_bit(disk->seq_zones_wlock, zno);
-
- bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
-}
-
-static bool dm_need_zone_wp_tracking(struct bio *bio)
-{
/*
- * Special processing is not needed for operations that do not need the
- * zone write lock, that is, all operations that target conventional
- * zones and all operations that do not modify directly a sequential
- * zone write pointer.
+ * Count conventional zones to check that the mapped device will indeed
+ * have sequential write required zones.
*/
- if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
- return false;
- switch (bio_op(bio)) {
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE:
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_FINISH:
- case REQ_OP_ZONE_APPEND:
- return bio_zone_is_seq(bio);
- default:
- return false;
+ md->zone_revalidate_map = t;
+ ret = dm_blk_report_zones(disk, 0, UINT_MAX,
+ dm_check_zoned_cb, &nr_conv_zones);
+ md->zone_revalidate_map = NULL;
+ if (ret < 0) {
+ DMERR("Check zoned failed %d", ret);
+ return ret;
}
-}
-
-/*
- * Special IO mapping for targets needing zone append emulation.
- */
-int dm_zone_map_bio(struct dm_target_io *tio)
-{
- struct dm_io *io = tio->io;
- struct dm_target *ti = tio->ti;
- struct mapped_device *md = io->md;
- struct bio *clone = &tio->clone;
- struct orig_bio_details orig_bio_details;
- unsigned int zno;
- blk_status_t sts;
- int r;
/*
- * IOs that do not change a zone write pointer do not need
- * any additional special processing.
+ * If we only have conventional zones, expose the mapped device as
+ * a regular device.
*/
- if (!dm_need_zone_wp_tracking(clone))
- return ti->type->map(ti, clone);
-
- /* Lock the target zone */
- zno = bio_zone_no(clone);
- dm_zone_lock(md->disk, zno, clone);
-
- orig_bio_details.nr_sectors = bio_sectors(clone);
- orig_bio_details.op = bio_op(clone);
-
- /*
- * Check that the bio and the target zone write pointer offset are
- * both valid, and if the bio is a zone append, remap it to a write.
- */
- if (!dm_zone_map_bio_begin(md, zno, clone)) {
- dm_zone_unlock(md->disk, zno, clone);
- return DM_MAPIO_KILL;
+ if (nr_conv_zones >= ret) {
+ lim->max_open_zones = 0;
+ lim->max_active_zones = 0;
+ lim->zoned = false;
+ clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ disk->nr_zones = 0;
+ return 0;
}
- /* Let the target do its work */
- r = ti->type->map(ti, clone);
- switch (r) {
- case DM_MAPIO_SUBMITTED:
- /*
- * The target submitted the clone BIO. The target zone will
- * be unlocked on completion of the clone.
- */
- sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
- *tio->len_ptr);
- break;
- case DM_MAPIO_REMAPPED:
- /*
- * The target only remapped the clone BIO. In case of error,
- * unlock the target zone here as the clone will not be
- * submitted.
- */
- sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
- *tio->len_ptr);
- if (sts != BLK_STS_OK)
- dm_zone_unlock(md->disk, zno, clone);
- break;
- case DM_MAPIO_REQUEUE:
- case DM_MAPIO_KILL:
- default:
- dm_zone_unlock(md->disk, zno, clone);
- sts = BLK_STS_IOERR;
- break;
+ if (!md->disk->nr_zones) {
+ DMINFO("%s using %s zone append",
+ md->disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
}
- if (sts != BLK_STS_OK)
- return DM_MAPIO_KILL;
+ ret = dm_revalidate_zones(md, t);
+ if (ret < 0)
+ return ret;
- return r;
+ if (!static_key_enabled(&zoned_enabled.key))
+ static_branch_enable(&zoned_enabled);
+ return 0;
}
/*
@@ -587,61 +292,17 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
struct mapped_device *md = io->md;
struct gendisk *disk = md->disk;
struct bio *orig_bio = io->orig_bio;
- unsigned int zwp_offset;
- unsigned int zno;
/*
- * For targets that do not emulate zone append, we only need to
- * handle native zone-append bios.
+ * Get the offset within the zone of the written sector
+ * and add that to the original bio sector position.
*/
- if (!dm_emulate_zone_append(md)) {
- /*
- * Get the offset within the zone of the written sector
- * and add that to the original bio sector position.
- */
- if (clone->bi_status == BLK_STS_OK &&
- bio_op(clone) == REQ_OP_ZONE_APPEND) {
- sector_t mask =
- (sector_t)bdev_zone_sectors(disk->part0) - 1;
-
- orig_bio->bi_iter.bi_sector +=
- clone->bi_iter.bi_sector & mask;
- }
-
- return;
- }
+ if (clone->bi_status == BLK_STS_OK &&
+ bio_op(clone) == REQ_OP_ZONE_APPEND) {
+ sector_t mask = bdev_zone_sectors(disk->part0) - 1;
- /*
- * For targets that do emulate zone append, if the clone BIO does not
- * own the target zone write lock, we have nothing to do.
- */
- if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
- return;
-
- zno = bio_zone_no(orig_bio);
-
- if (clone->bi_status != BLK_STS_OK) {
- /*
- * BIOs that modify a zone write pointer may leave the zone
- * in an unknown state in case of failure (e.g. the write
- * pointer was only partially advanced). In this case, set
- * the target zone write pointer as invalid unless it is
- * already being updated.
- */
- WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST);
- } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
- /*
- * Get the written sector for zone append operation that were
- * emulated using regular write operations.
- */
- zwp_offset = READ_ONCE(md->zwp_offset[zno]);
- if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio)))
- WRITE_ONCE(md->zwp_offset[zno],
- DM_ZONE_INVALID_WP_OFST);
- else
- orig_bio->bi_iter.bi_sector +=
- zwp_offset - bio_sectors(orig_bio);
+ orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask;
}
- dm_zone_unlock(disk, zno, clone);
+ return;
}
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 621794a9edd6..12236e6f46f3 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1001,7 +1001,6 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->discard_alignment = 0;
limits->discard_granularity = DMZ_BLOCK_SIZE;
- limits->max_discard_sectors = chunk_sectors;
limits->max_hw_discard_sectors = chunk_sectors;
limits->max_write_zeroes_sectors = chunk_sectors;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 56aa2a8b9d71..13037d6a6f62 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -765,7 +765,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
return td;
out_blkdev_put:
- fput(bdev_file);
+ __fput_sync(bdev_file);
out_free_td:
kfree(td);
return ERR_PTR(r);
@@ -778,7 +778,13 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
{
if (md->disk->slave_dir)
bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
- fput(td->dm_dev.bdev_file);
+
+ /* Leverage async fput() if DMF_DEFERRED_REMOVE set */
+ if (unlikely(test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
+ fput(td->dm_dev.bdev_file);
+ else
+ __fput_sync(td->dm_dev.bdev_file);
+
put_dax(td->dm_dev.dax_dev);
list_del(&td->list);
kfree(td);
@@ -1080,7 +1086,7 @@ void disable_discard(struct mapped_device *md)
struct queue_limits *limits = dm_get_queue_limits(md);
/* device doesn't really support DISCARD, disable it */
- limits->max_discard_sectors = 0;
+ limits->max_hw_discard_sectors = 0;
}
void disable_write_zeroes(struct mapped_device *md)
@@ -1422,25 +1428,12 @@ static void __map_bio(struct bio *clone)
down(&md->swap_bios_semaphore);
}
- if (static_branch_unlikely(&zoned_enabled)) {
- /*
- * Check if the IO needs a special mapping due to zone append
- * emulation on zoned target. In this case, dm_zone_map_bio()
- * calls the target map operation.
- */
- if (unlikely(dm_emulate_zone_append(md)))
- r = dm_zone_map_bio(tio);
- else
- goto do_map;
- } else {
-do_map:
- if (likely(ti->type->map == linear_map))
- r = linear_map(ti, clone);
- else if (ti->type->map == stripe_map)
- r = stripe_map(ti, clone);
- else
- r = ti->type->map(ti, clone);
- }
+ if (likely(ti->type->map == linear_map))
+ r = linear_map(ti, clone);
+ else if (ti->type->map == stripe_map)
+ r = stripe_map(ti, clone);
+ else
+ r = ti->type->map(ti, clone);
switch (r) {
case DM_MAPIO_SUBMITTED:
@@ -1768,6 +1761,33 @@ static void init_clone_info(struct clone_info *ci, struct dm_io *io,
ci->sector_count = 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
+ struct bio *bio)
+{
+ /*
+ * For mapped device that need zone append emulation, we must
+ * split any large BIO that straddles zone boundaries.
+ */
+ return dm_emulate_zone_append(md) && bio_straddles_zones(bio) &&
+ !bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
+}
+static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
+{
+ return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
+}
+#else
+static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
+ struct bio *bio)
+{
+ return false;
+}
+static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
+{
+ return false;
+}
+#endif
+
/*
* Entry point to split a bio into clones and submit them to the targets.
*/
@@ -1777,19 +1797,32 @@ static void dm_split_and_process_bio(struct mapped_device *md,
struct clone_info ci;
struct dm_io *io;
blk_status_t error = BLK_STS_OK;
- bool is_abnormal;
+ bool is_abnormal, need_split;
- is_abnormal = is_abnormal_io(bio);
- if (unlikely(is_abnormal)) {
+ need_split = is_abnormal = is_abnormal_io(bio);
+ if (static_branch_unlikely(&zoned_enabled))
+ need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
+
+ if (unlikely(need_split)) {
/*
* Use bio_split_to_limits() for abnormal IO (e.g. discard, etc)
* otherwise associated queue_limits won't be imposed.
+ * Also split the BIO for mapped devices needing zone append
+ * emulation to ensure that the BIO does not cross zone
+ * boundaries.
*/
bio = bio_split_to_limits(bio);
if (!bio)
return;
}
+ /*
+ * Use the block layer zone write plugging for mapped devices that
+ * need zone append emulation (e.g. dm-crypt).
+ */
+ if (static_branch_unlikely(&zoned_enabled) && dm_zone_plug_bio(md, bio))
+ return;
+
/* Only support nowait for normal IO */
if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
io = alloc_io(md, bio, GFP_NOWAIT);
@@ -2010,7 +2043,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
md->dax_dev = NULL;
}
- dm_cleanup_zoned_dev(md);
if (md->disk) {
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 7f1acbf6bd9e..53ef8207fe2c 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -101,16 +101,15 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
/*
* Zoned targets related functions.
*/
-int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
+int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
+ struct queue_limits *lim);
void dm_zone_endio(struct dm_io *io, struct bio *clone);
#ifdef CONFIG_BLK_DEV_ZONED
-void dm_cleanup_zoned_dev(struct mapped_device *md);
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
#else
-static inline void dm_cleanup_zoned_dev(struct mapped_device *md) {}
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
{
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 059afc24c08b..0a2d37eb38ef 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1424,7 +1424,7 @@ __acquires(bitmap->lock)
sector_t chunk = offset >> bitmap->chunkshift;
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
- sector_t csize;
+ sector_t csize = ((sector_t)1) << bitmap->chunkshift;
int err;
if (page >= bitmap->pages) {
@@ -1433,6 +1433,7 @@ __acquires(bitmap->lock)
* End-of-device while looking for a whole page or
* user set a huge number to sysfs bitmap_set_bits.
*/
+ *blocks = csize - (offset & (csize - 1));
return NULL;
}
err = md_bitmap_checkpage(bitmap, page, create, 0);
@@ -1441,8 +1442,7 @@ __acquires(bitmap->lock)
bitmap->bp[page].map == NULL)
csize = ((sector_t)1) << (bitmap->chunkshift +
PAGE_COUNTER_SHIFT);
- else
- csize = ((sector_t)1) << bitmap->chunkshift;
+
*blocks = csize - (offset & (csize - 1));
if (err < 0)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e575e74aabf5..aff9118ff697 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8087,7 +8087,8 @@ void md_wakeup_thread(struct md_thread __rcu *thread)
if (t) {
pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
set_bit(THREAD_WAKEUP, &t->flags);
- wake_up(&t->wqueue);
+ if (wq_has_sleeper(&t->wqueue))
+ wake_up(&t->wqueue);
}
rcu_read_unlock();
}
@@ -8582,6 +8583,10 @@ static int is_mddev_idle(struct mddev *mddev, int init)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_disk;
+
+ if (!init && !blk_queue_io_stat(disk->queue))
+ continue;
+
curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 097d9dbd69b8..ca085ecad504 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -621,7 +621,8 @@ extern void mddev_unlock(struct mddev *mddev);
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{
- atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
+ if (blk_queue_io_stat(bdev->bd_disk->queue))
+ atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
}
static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d874abfc1836..2bd1ce9b3922 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -36,7 +36,6 @@
*/
#include <linux/blkdev.h>
-#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
@@ -6734,6 +6733,9 @@ static void raid5d(struct md_thread *thread)
int batch_size, released;
unsigned int offset;
+ if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ break;
+
released = release_stripe_list(conf, conf->temp_inactive_list);
if (released)
clear_bit(R5_DID_ALLOC, &conf->cache_state);
@@ -6770,18 +6772,7 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
-
- /*
- * Waiting on MD_SB_CHANGE_PENDING below may deadlock
- * seeing md_check_recovery() is needed to clear
- * the flag when using mdmon.
- */
- continue;
}
-
- wait_event_lock_irq(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
- conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);