summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/alloc.c18
-rw-r--r--drivers/md/bcache/btree.c255
-rw-r--r--drivers/md/bcache/btree.h84
-rw-r--r--drivers/md/bcache/request.c7
-rw-r--r--drivers/md/bcache/request.h3
-rw-r--r--drivers/md/bcache/super.c11
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/writeback.c164
-rw-r--r--drivers/md/bcache/writeback.h19
-rw-r--r--drivers/md/dm-bio-record.h15
-rw-r--r--drivers/md/dm-cache-target.c6
-rw-r--r--drivers/md/dm-integrity.c84
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-thin-metadata.c2
-rw-r--r--drivers/md/dm-verity-target.c2
-rw-r--r--drivers/md/dm-writecache.c16
-rw-r--r--drivers/md/dm-zoned-target.c10
-rw-r--r--drivers/md/dm.c32
-rw-r--r--drivers/md/md.c11
19 files changed, 552 insertions, 191 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8bc1faf71ff2..a1df0d95151c 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -67,7 +67,6 @@
#include <linux/blkdev.h>
#include <linux/kthread.h>
#include <linux/random.h>
-#include <linux/sched/signal.h>
#include <trace/events/bcache.h>
#define MAX_OPEN_BUCKETS 128
@@ -734,21 +733,8 @@ int bch_open_buckets_alloc(struct cache_set *c)
int bch_cache_allocator_start(struct cache *ca)
{
- struct task_struct *k;
-
- /*
- * In case previous btree check operation occupies too many
- * system memory for bcache btree node cache, and the
- * registering process is selected by OOM killer. Here just
- * ignore the SIGKILL sent by OOM killer if there is, to
- * avoid kthread_run() being failed by pending signals. The
- * bcache registering process will exit after the registration
- * done.
- */
- if (signal_pending(current))
- flush_signals(current);
-
- k = kthread_run(bch_allocator_thread, ca, "bcache_allocator");
+ struct task_struct *k = kthread_run(bch_allocator_thread,
+ ca, "bcache_allocator");
if (IS_ERR(k))
return PTR_ERR(k);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index b12186c87f52..72856e5f23a3 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -34,7 +34,6 @@
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched/clock.h>
-#include <linux/sched/signal.h>
#include <linux/rculist.h>
#include <linux/delay.h>
#include <trace/events/bcache.h>
@@ -102,64 +101,6 @@
#define insert_lock(s, b) ((b)->level <= (s)->lock)
-/*
- * These macros are for recursing down the btree - they handle the details of
- * locking and looking up nodes in the cache for you. They're best treated as
- * mere syntax when reading code that uses them.
- *
- * op->lock determines whether we take a read or a write lock at a given depth.
- * If you've got a read lock and find that you need a write lock (i.e. you're
- * going to have to split), set op->lock and return -EINTR; btree_root() will
- * call you again and you'll have the correct lock.
- */
-
-/**
- * btree - recurse down the btree on a specified key
- * @fn: function to call, which will be passed the child node
- * @key: key to recurse on
- * @b: parent btree node
- * @op: pointer to struct btree_op
- */
-#define btree(fn, key, b, op, ...) \
-({ \
- int _r, l = (b)->level - 1; \
- bool _w = l <= (op)->lock; \
- struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
- _w, b); \
- if (!IS_ERR(_child)) { \
- _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
- rw_unlock(_w, _child); \
- } else \
- _r = PTR_ERR(_child); \
- _r; \
-})
-
-/**
- * btree_root - call a function on the root of the btree
- * @fn: function to call, which will be passed the child node
- * @c: cache set
- * @op: pointer to struct btree_op
- */
-#define btree_root(fn, c, op, ...) \
-({ \
- int _r = -EINTR; \
- do { \
- struct btree *_b = (c)->root; \
- bool _w = insert_lock(op, _b); \
- rw_lock(_w, _b, _b->level); \
- if (_b == (c)->root && \
- _w == insert_lock(op, _b)) { \
- _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
- } \
- rw_unlock(_w, _b); \
- bch_cannibalize_unlock(c); \
- if (_r == -EINTR) \
- schedule(); \
- } while (_r == -EINTR); \
- \
- finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
- _r; \
-})
static inline struct bset *write_block(struct btree *b)
{
@@ -1849,7 +1790,7 @@ static void bch_btree_gc(struct cache_set *c)
/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
+ ret = bcache_btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
cond_resched();
@@ -1914,18 +1855,6 @@ static int bch_gc_thread(void *arg)
int bch_gc_thread_start(struct cache_set *c)
{
- /*
- * In case previous btree check operation occupies too many
- * system memory for bcache btree node cache, and the
- * registering process is selected by OOM killer. Here just
- * ignore the SIGKILL sent by OOM killer if there is, to
- * avoid kthread_run() being failed by pending signals. The
- * bcache registering process will exit after the registration
- * done.
- */
- if (signal_pending(current))
- flush_signals(current);
-
c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
return PTR_ERR_OR_ZERO(c->gc_thread);
}
@@ -1959,7 +1888,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
}
if (p)
- ret = btree(check_recurse, p, b, op);
+ ret = bcache_btree(check_recurse, p, b, op);
p = k;
} while (p && !ret);
@@ -1968,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
return ret;
}
+
+static int bch_btree_check_thread(void *arg)
+{
+ int ret;
+ struct btree_check_info *info = arg;
+ struct btree_check_state *check_state = info->state;
+ struct cache_set *c = check_state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i, n;
+
+ k = p = NULL;
+ i = n = 0;
+ cur_idx = prev_idx = 0;
+ ret = 0;
+
+ /* root node keys are checked before thread created */
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+ while (k) {
+ /*
+ * Fetch a root node key index, skip the keys which
+ * should be fetched by other threads, then check the
+ * sub-tree indexed by the fetched key.
+ */
+ spin_lock(&check_state->idx_lock);
+ cur_idx = check_state->key_idx;
+ check_state->key_idx++;
+ spin_unlock(&check_state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ /*
+ * No more keys to check in root node,
+ * current checking threads are enough,
+ * stop creating more.
+ */
+ atomic_set(&check_state->enough, 1);
+ /* Update check_state->enough earlier */
+ smp_mb__after_atomic();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ struct btree_op op;
+
+ btree_node_prefetch(c->root, p);
+ c->gc_stats.nodes++;
+ bch_btree_op_init(&op, 0);
+ ret = bcache_btree(check_recurse, p, c->root, &op);
+ if (ret)
+ goto out;
+ }
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ info->result = ret;
+ /* update check_state->started among all CPUs */
+ smp_mb__before_atomic();
+ if (atomic_dec_and_test(&check_state->started))
+ wake_up(&check_state->wait);
+
+ return ret;
+}
+
+
+
+static int bch_btree_chkthread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_BTR_CHKTHREAD_MAX)
+ n = BCH_BTR_CHKTHREAD_MAX;
+
+ return n;
+}
+
int bch_btree_check(struct cache_set *c)
{
- struct btree_op op;
+ int ret = 0;
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct btree_check_state *check_state;
+ char name[32];
- bch_btree_op_init(&op, SHRT_MAX);
+ /* check and mark root node keys */
+ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
+ bch_initial_mark_key(c, c->root->level, k);
+
+ bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
+
+ if (c->root->level == 0)
+ return 0;
+
+ check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
+ if (!check_state)
+ return -ENOMEM;
+
+ check_state->c = c;
+ check_state->total_threads = bch_btree_chkthread_nr();
+ check_state->key_idx = 0;
+ spin_lock_init(&check_state->idx_lock);
+ atomic_set(&check_state->started, 0);
+ atomic_set(&check_state->enough, 0);
+ init_waitqueue_head(&check_state->wait);
- return btree_root(check_recurse, c, &op);
+ /*
+ * Run multiple threads to check btree nodes in parallel,
+ * if check_state->enough is non-zero, it means current
+ * running check threads are enough, unncessary to create
+ * more.
+ */
+ for (i = 0; i < check_state->total_threads; i++) {
+ /* fetch latest check_state->enough earlier */
+ smp_mb__before_atomic();
+ if (atomic_read(&check_state->enough))
+ break;
+
+ check_state->infos[i].result = 0;
+ check_state->infos[i].state = check_state;
+ snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
+ atomic_inc(&check_state->started);
+
+ check_state->infos[i].thread =
+ kthread_run(bch_btree_check_thread,
+ &check_state->infos[i],
+ name);
+ if (IS_ERR(check_state->infos[i].thread)) {
+ pr_err("fails to run thread bch_btrchk[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(check_state->infos[i].thread);
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(check_state->wait,
+ atomic_read(&check_state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+ for (i = 0; i < check_state->total_threads; i++) {
+ if (check_state->infos[i].result) {
+ ret = check_state->infos[i].result;
+ goto out;
+ }
+ }
+
+out:
+ kfree(check_state);
+ return ret;
}
void bch_initial_gc_finish(struct cache_set *c)
@@ -2414,7 +2506,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad))) {
- ret = btree(map_nodes_recurse, k, b,
+ ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
from = NULL;
@@ -2432,10 +2524,10 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_nodes_fn *fn, int flags)
{
- return btree_root(map_nodes_recurse, c, op, from, fn, flags);
+ return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags);
}
-static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
+int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
struct bkey *from, btree_map_keys_fn *fn,
int flags)
{
@@ -2448,7 +2540,8 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
- : btree(map_keys_recurse, k, b, op, from, fn, flags);
+ : bcache_btree(map_keys_recurse, k,
+ b, op, from, fn, flags);
from = NULL;
if (ret != MAP_CONTINUE)
@@ -2465,7 +2558,7 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags)
{
- return btree_root(map_keys_recurse, c, op, from, fn, flags);
+ return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags);
}
/* Keybuf code */
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index f4dcca449391..257969980c49 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -145,6 +145,9 @@ struct btree {
struct bio *bio;
};
+
+
+
#define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
@@ -216,6 +219,25 @@ struct btree_op {
unsigned int insert_collision:1;
};
+struct btree_check_state;
+struct btree_check_info {
+ struct btree_check_state *state;
+ struct task_struct *thread;
+ int result;
+};
+
+#define BCH_BTR_CHKTHREAD_MAX 64
+struct btree_check_state {
+ struct cache_set *c;
+ int total_threads;
+ int key_idx;
+ spinlock_t idx_lock;
+ atomic_t started;
+ atomic_t enough;
+ wait_queue_head_t wait;
+ struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
+};
+
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
memset(op, 0, sizeof(struct btree_op));
@@ -284,6 +306,65 @@ static inline void force_wake_up_gc(struct cache_set *c)
wake_up_gc(c);
}
+/*
+ * These macros are for recursing down the btree - they handle the details of
+ * locking and looking up nodes in the cache for you. They're best treated as
+ * mere syntax when reading code that uses them.
+ *
+ * op->lock determines whether we take a read or a write lock at a given depth.
+ * If you've got a read lock and find that you need a write lock (i.e. you're
+ * going to have to split), set op->lock and return -EINTR; btree_root() will
+ * call you again and you'll have the correct lock.
+ */
+
+/**
+ * btree - recurse down the btree on a specified key
+ * @fn: function to call, which will be passed the child node
+ * @key: key to recurse on
+ * @b: parent btree node
+ * @op: pointer to struct btree_op
+ */
+#define bcache_btree(fn, key, b, op, ...) \
+({ \
+ int _r, l = (b)->level - 1; \
+ bool _w = l <= (op)->lock; \
+ struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
+ _w, b); \
+ if (!IS_ERR(_child)) { \
+ _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
+ rw_unlock(_w, _child); \
+ } else \
+ _r = PTR_ERR(_child); \
+ _r; \
+})
+
+/**
+ * btree_root - call a function on the root of the btree
+ * @fn: function to call, which will be passed the child node
+ * @c: cache set
+ * @op: pointer to struct btree_op
+ */
+#define bcache_btree_root(fn, c, op, ...) \
+({ \
+ int _r = -EINTR; \
+ do { \
+ struct btree *_b = (c)->root; \
+ bool _w = insert_lock(op, _b); \
+ rw_lock(_w, _b, _b->level); \
+ if (_b == (c)->root && \
+ _w == insert_lock(op, _b)) { \
+ _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
+ } \
+ rw_unlock(_w, _b); \
+ bch_cannibalize_unlock(c); \
+ if (_r == -EINTR) \
+ schedule(); \
+ } while (_r == -EINTR); \
+ \
+ finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
+ _r; \
+})
+
#define MAP_DONE 0
#define MAP_CONTINUE 1
@@ -314,6 +395,9 @@ typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
struct bkey *k);
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags);
+int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
+ struct bkey *from, btree_map_keys_fn *fn,
+ int flags);
typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 820d8402a1dc..71a90fbec314 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1161,8 +1161,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
/* Cached devices - read & write stuff */
-static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct bio *bio)
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
struct bcache_device *d = bio->bi_disk->private_data;
@@ -1266,7 +1265,6 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
{
struct gendisk *g = dc->disk.disk;
- g->queue->make_request_fn = cached_dev_make_request;
g->queue->backing_dev_info->congested_fn = cached_dev_congested;
dc->disk.cache_miss = cached_dev_cache_miss;
dc->disk.ioctl = cached_dev_ioctl;
@@ -1301,8 +1299,7 @@ static void flash_dev_nodata(struct closure *cl)
continue_at(cl, search_free, NULL);
}
-static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bio *bio)
+blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
struct closure *cl;
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index c64dbd7a91aa..bb005c93dd72 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -37,7 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio);
+
void bch_flash_dev_request_init(struct bcache_device *d);
+blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio);
extern struct kmem_cache *bch_search_cache;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 0c3c5419c52b..d98354fa28e3 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -816,7 +816,7 @@ static void bcache_device_free(struct bcache_device *d)
}
static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
- sector_t sectors)
+ sector_t sectors, make_request_fn make_request_fn)
{
struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
@@ -866,11 +866,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->fops = &bcache_ops;
d->disk->private_data = d;
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE);
if (!q)
return -ENOMEM;
- blk_queue_make_request(q, NULL);
d->disk->queue = q;
q->queuedata = d;
q->backing_dev_info->congested_data = d;
@@ -1339,7 +1338,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
q->limits.raid_partial_stripes_expensive;
ret = bcache_device_init(&dc->disk, block_size,
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
+ cached_dev_make_request);
if (ret)
return ret;
@@ -1451,7 +1451,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- if (bcache_device_init(d, block_bytes(c), u->sectors))
+ if (bcache_device_init(d, block_bytes(c), u->sectors,
+ flash_dev_make_request))
goto err;
bcache_device_attach(d, c, u - c->uuids);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3470fae4eabc..323276994aab 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -154,7 +154,7 @@ static ssize_t bch_snprint_string_list(char *buf,
size_t i;
for (i = 0; list[i]; i++)
- out += snprintf(out, buf + size - out,
+ out += scnprintf(out, buf + size - out,
i == selected ? "[%s] " : "%s ", list[i]);
out[-1] = '\n';
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 4a40f9eadeaf..3f7641fb28d5 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -183,7 +183,7 @@ static void update_writeback_rate(struct work_struct *work)
*/
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -193,7 +193,7 @@ static void update_writeback_rate(struct work_struct *work)
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
return;
}
@@ -229,7 +229,7 @@ static void update_writeback_rate(struct work_struct *work)
*/
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
}
static unsigned int writeback_delay(struct cached_dev *dc,
@@ -785,7 +785,9 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
return MAP_CONTINUE;
}
-void bch_sectors_dirty_init(struct bcache_device *d)
+static int bch_root_node_dirty_init(struct cache_set *c,
+ struct bcache_device *d,
+ struct bkey *k)
{
struct sectors_dirty_init op;
int ret;
@@ -796,8 +798,13 @@ void bch_sectors_dirty_init(struct bcache_device *d)
op.start = KEY(op.inode, 0, 0);
do {
- ret = bch_btree_map_keys(&op.op, d->c, &op.start,
- sectors_dirty_init_fn, 0);
+ ret = bcache_btree(map_keys_recurse,
+ k,
+ c->root,
+ &op.op,
+ &op.start,
+ sectors_dirty_init_fn,
+ 0);
if (ret == -EAGAIN)
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
@@ -806,6 +813,151 @@ void bch_sectors_dirty_init(struct bcache_device *d)
break;
}
} while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static int bch_dirty_init_thread(void *arg)
+{
+ struct dirty_init_thrd_info *info = arg;
+ struct bch_dirty_init_state *state = info->state;
+ struct cache_set *c = state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i;
+
+ k = p = NULL;
+ i = 0;
+ cur_idx = prev_idx = 0;
+
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+
+ while (k) {
+ spin_lock(&state->idx_lock);
+ cur_idx = state->key_idx;
+ state->key_idx++;
+ spin_unlock(&state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ atomic_set(&state->enough, 1);
+ /* Update state->enough earlier */
+ smp_mb__after_atomic();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ if (bch_root_node_dirty_init(c, state->d, p) < 0)
+ goto out;
+ }
+
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ /* In order to wake up state->wait in time */
+ smp_mb__before_atomic();
+ if (atomic_dec_and_test(&state->started))
+ wake_up(&state->wait);
+
+ return 0;
+}
+
+static int bch_btre_dirty_init_thread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_DIRTY_INIT_THRD_MAX)
+ n = BCH_DIRTY_INIT_THRD_MAX;
+
+ return n;
+}
+
+void bch_sectors_dirty_init(struct bcache_device *d)
+{
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct sectors_dirty_init op;
+ struct cache_set *c = d->c;
+ struct bch_dirty_init_state *state;
+ char name[32];
+
+ /* Just count root keys if no leaf node */
+ if (c->root->level == 0) {
+ bch_btree_op_init(&op.op, -1);
+ op.inode = d->id;
+ op.count = 0;
+ op.start = KEY(op.inode, 0, 0);
+
+ for_each_key_filter(&c->root->keys,
+ k, &iter, bch_ptr_invalid)
+ sectors_dirty_init_fn(&op.op, c->root, k);
+ return;
+ }
+
+ state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
+ if (!state) {
+ pr_warn("sectors dirty init failed: cannot allocate memory");
+ return;
+ }
+
+ state->c = c;
+ state->d = d;
+ state->total_threads = bch_btre_dirty_init_thread_nr();
+ state->key_idx = 0;
+ spin_lock_init(&state->idx_lock);
+ atomic_set(&state->started, 0);
+ atomic_set(&state->enough, 0);
+ init_waitqueue_head(&state->wait);
+
+ for (i = 0; i < state->total_threads; i++) {
+ /* Fetch latest state->enough earlier */
+ smp_mb__before_atomic();
+ if (atomic_read(&state->enough))
+ break;
+
+ state->infos[i].state = state;
+ atomic_inc(&state->started);
+ snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
+
+ state->infos[i].thread =
+ kthread_run(bch_dirty_init_thread,
+ &state->infos[i],
+ name);
+ if (IS_ERR(state->infos[i].thread)) {
+ pr_err("fails to run thread bch_dirty_init[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(state->infos[i].thread);
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(state->wait,
+ atomic_read(&state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+out:
+ kfree(state);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 4e4c6810dc3c..b029843ce5b6 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -16,6 +16,7 @@
#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
+#define BCH_DIRTY_INIT_THRD_MAX 64
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up
@@ -23,6 +24,24 @@
*/
#define WRITEBACK_SHARE_SHIFT 14
+struct bch_dirty_init_state;
+struct dirty_init_thrd_info {
+ struct bch_dirty_init_state *state;
+ struct task_struct *thread;
+};
+
+struct bch_dirty_init_state {
+ struct cache_set *c;
+ struct bcache_device *d;
+ int total_threads;
+ int key_idx;
+ spinlock_t idx_lock;
+ atomic_t started;
+ atomic_t enough;
+ wait_queue_head_t wait;
+ struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX];
+};
+
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
{
uint64_t i, ret = 0;
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index c82578af56a5..2ea0360108e1 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -20,8 +20,13 @@
struct dm_bio_details {
struct gendisk *bi_disk;
u8 bi_partno;
+ int __bi_remaining;
unsigned long bi_flags;
struct bvec_iter bi_iter;
+ bio_end_io_t *bi_end_io;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ struct bio_integrity_payload *bi_integrity;
+#endif
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
@@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
bd->bi_partno = bio->bi_partno;
bd->bi_flags = bio->bi_flags;
bd->bi_iter = bio->bi_iter;
+ bd->__bi_remaining = atomic_read(&bio->__bi_remaining);
+ bd->bi_end_io = bio->bi_end_io;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ bd->bi_integrity = bio_integrity(bio);
+#endif
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
@@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
bio->bi_partno = bd->bi_partno;
bio->bi_flags = bd->bi_flags;
bio->bi_iter = bd->bi_iter;
+ atomic_set(&bio->__bi_remaining, bd->__bi_remaining);
+ bio->bi_end_io = bd->bi_end_io;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ bio->bi_integrity = bd->bi_integrity;
+#endif
}
#endif
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2d32821b3a5b..d3bb355819a4 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2846,8 +2846,8 @@ static void cache_postsuspend(struct dm_target *ti)
prevent_background_work(cache);
BUG_ON(atomic_read(&cache->nr_io_migrations));
- cancel_delayed_work(&cache->waker);
- flush_workqueue(cache->wq);
+ cancel_delayed_work_sync(&cache->waker);
+ drain_workqueue(cache->wq);
WARN_ON(cache->tracker.in_flight);
/*
@@ -3492,7 +3492,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {2, 1, 0},
+ .version = {2, 2, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index b225b3e445fa..2f03fecd312d 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -6,6 +6,8 @@
* This file is released under the GPL.
*/
+#include "dm-bio-record.h"
+
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/device-mapper.h>
@@ -201,17 +203,19 @@ struct dm_integrity_c {
__u8 log2_blocks_per_bitmap_bit;
unsigned char mode;
- int suspending;
int failed;
struct crypto_shash *internal_hash;
+ struct dm_target *ti;
+
/* these variables are locked with endio_wait.lock */
struct rb_root in_progress;
struct list_head wait_list;
wait_queue_head_t endio_wait;
struct workqueue_struct *wait_wq;
+ struct workqueue_struct *offload_wq;
unsigned char commit_seq;
commit_id_t commit_ids[N_COMMIT_IDS];
@@ -293,11 +297,7 @@ struct dm_integrity_io {
struct completion *completion;
- struct gendisk *orig_bi_disk;
- u8 orig_bi_partno;
- bio_end_io_t *orig_bi_end_io;
- struct bio_integrity_payload *orig_bi_integrity;
- struct bvec_iter orig_bi_iter;
+ struct dm_bio_details bio_details;
};
struct journal_completion {
@@ -1439,7 +1439,7 @@ static void dec_in_flight(struct dm_integrity_io *dio)
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->wait_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
return;
}
do_endio_flush(ic, dio);
@@ -1450,14 +1450,9 @@ static void integrity_end_io(struct bio *bio)
{
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
- bio->bi_iter = dio->orig_bi_iter;
- bio->bi_disk = dio->orig_bi_disk;
- bio->bi_partno = dio->orig_bi_partno;
- if (dio->orig_bi_integrity) {
- bio->bi_integrity = dio->orig_bi_integrity;
+ dm_bio_restore(&dio->bio_details, bio);
+ if (bio->bi_integrity)
bio->bi_opf |= REQ_INTEGRITY;
- }
- bio->bi_end_io = dio->orig_bi_end_io;
if (dio->completion)
complete(dio->completion);
@@ -1542,7 +1537,7 @@ static void integrity_metadata(struct work_struct *w)
}
}
- __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
+ __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
unsigned pos;
char *mem, *checksums_ptr;
@@ -1586,7 +1581,7 @@ again:
if (likely(checksums != checksums_onstack))
kfree(checksums);
} else {
- struct bio_integrity_payload *bip = dio->orig_bi_integrity;
+ struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
if (bip) {
struct bio_vec biv;
@@ -1865,7 +1860,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
if (need_sync_io && from_map) {
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->metadata_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
return;
}
@@ -2005,20 +2000,13 @@ offload_to_thread:
} else
dio->completion = NULL;
- dio->orig_bi_iter = bio->bi_iter;
-
- dio->orig_bi_disk = bio->bi_disk;
- dio->orig_bi_partno = bio->bi_partno;
+ dm_bio_record(&dio->bio_details, bio);
bio_set_dev(bio, ic->dev->bdev);
-
- dio->orig_bi_integrity = bio_integrity(bio);
bio->bi_integrity = NULL;
bio->bi_opf &= ~REQ_INTEGRITY;
-
- dio->orig_bi_end_io = bio->bi_end_io;
bio->bi_end_io = integrity_end_io;
-
bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
+
generic_make_request(bio);
if (need_sync_io) {
@@ -2315,7 +2303,7 @@ static void integrity_writer(struct work_struct *w)
unsigned prev_free_sectors;
/* the following test is not needed, but it tests the replay code */
- if (READ_ONCE(ic->suspending) && !ic->meta_dev)
+ if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev)
return;
spin_lock_irq(&ic->endio_wait.lock);
@@ -2376,7 +2364,7 @@ static void integrity_recalc(struct work_struct *w)
next_chunk:
- if (unlikely(READ_ONCE(ic->suspending)))
+ if (unlikely(dm_suspended(ic->ti)))
goto unlock_ret;
range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
@@ -2501,7 +2489,7 @@ static void bitmap_block_work(struct work_struct *w)
dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
remove_range(ic, &dio->range);
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->wait_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
} else {
block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
dio->range.n_sectors, BITMAP_OP_SET);
@@ -2524,7 +2512,7 @@ static void bitmap_block_work(struct work_struct *w)
remove_range(ic, &dio->range);
INIT_WORK(&dio->work, integrity_bio_wait);
- queue_work(ic->wait_wq, &dio->work);
+ queue_work(ic->offload_wq, &dio->work);
}
queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
@@ -2804,8 +2792,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
del_timer_sync(&ic->autocommit_timer);
- WRITE_ONCE(ic->suspending, 1);
-
if (ic->recalc_wq)
drain_workqueue(ic->recalc_wq);
@@ -2834,8 +2820,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
#endif
}
- WRITE_ONCE(ic->suspending, 0);
-
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
ic->journal_uptodate = true;
@@ -2888,17 +2872,24 @@ static void dm_integrity_resume(struct dm_target *ti)
} else {
replay_journal(ic);
if (ic->mode == 'B') {
- int mode;
ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
if (unlikely(r))
dm_integrity_io_error(ic, "writing superblock", r);
- mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR;
- block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode);
- block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode);
- block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode);
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+ le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
+ block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
+ ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
+ }
rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
}
@@ -2967,7 +2958,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" meta_device:%s", ic->meta_dev->name);
if (ic->sectors_per_block != 1)
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
- if (ic->recalculate_flag)
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
DMEMIT(" recalculate");
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
@@ -3623,6 +3614,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
ti->private = ic;
ti->per_io_data_size = sizeof(struct dm_integrity_io);
+ ic->ti = ti;
ic->in_progress = RB_ROOT;
INIT_LIST_HEAD(&ic->wait_list);
@@ -3836,6 +3828,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
+ ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
+ METADATA_WORKQUEUE_MAX_ACTIVE);
+ if (!ic->offload_wq) {
+ ti->error = "Cannot allocate workqueue";
+ r = -ENOMEM;
+ goto bad;
+ }
+
ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
if (!ic->commit_wq) {
ti->error = "Cannot allocate workqueue";
@@ -4140,6 +4140,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
destroy_workqueue(ic->metadata_wq);
if (ic->wait_wq)
destroy_workqueue(ic->wait_wq);
+ if (ic->offload_wq)
+ destroy_workqueue(ic->offload_wq);
if (ic->commit_wq)
destroy_workqueue(ic->commit_wq);
if (ic->writer_wq)
@@ -4200,7 +4202,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2bc18c9c3abc..58fd137b6ae1 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -2053,7 +2053,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
- .version = {1, 13, 0},
+ .version = {1, 14, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index fc9947d6210c..76b6b323bf4b 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -960,9 +960,9 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
DMWARN("%s: __commit_transaction() failed, error = %d",
__func__, r);
}
+ pmd_write_unlock(pmd);
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd);
- pmd_write_unlock(pmd);
kfree(pmd);
return 0;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 0d61e9c67986..eec9f252e935 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1221,7 +1221,7 @@ bad:
static struct target_type verity_target = {
.name = "verity",
- .version = {1, 5, 0},
+ .version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index b9e27e37a943..a09bdc000e64 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -625,6 +625,12 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry
wc->freelist_size++;
}
+static inline void writecache_verify_watermark(struct dm_writecache *wc)
+{
+ if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
+ queue_work(wc->writeback_wq, &wc->writeback_work);
+}
+
static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector)
{
struct wc_entry *e;
@@ -650,8 +656,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, s
list_del(&e->lru);
}
wc->freelist_size--;
- if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
- queue_work(wc->writeback_wq, &wc->writeback_work);
+
+ writecache_verify_watermark(wc);
return e;
}
@@ -842,7 +848,7 @@ static void writecache_suspend(struct dm_target *ti)
}
wc_unlock(wc);
- flush_workqueue(wc->writeback_wq);
+ drain_workqueue(wc->writeback_wq);
wc_lock(wc);
if (flush_on_suspend)
@@ -965,6 +971,8 @@ erase_this:
writecache_commit_flushed(wc, false);
}
+ writecache_verify_watermark(wc);
+
wc_unlock(wc);
}
@@ -2312,7 +2320,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
static struct target_type writecache_target = {
.name = "writecache",
- .version = {1, 1, 1},
+ .version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = writecache_ctr,
.dtr = writecache_dtr,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 70a1063161c0..f4f83d39b3dc 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -533,8 +533,9 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
/* Get the BIO chunk work. If one is not active yet, create one */
cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
- if (!cw) {
-
+ if (cw) {
+ dmz_get_chunk_work(cw);
+ } else {
/* Create a new chunk work */
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
if (unlikely(!cw)) {
@@ -543,7 +544,7 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}
INIT_WORK(&cw->work, dmz_chunk_work);
- refcount_set(&cw->refcount, 0);
+ refcount_set(&cw->refcount, 1);
cw->target = dmz;
cw->chunk = chunk;
bio_list_init(&cw->bio_list);
@@ -556,7 +557,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
}
bio_list_add(&cw->bio_list, bio);
- dmz_get_chunk_work(cw);
dmz_reclaim_bio_acc(dmz->reclaim);
if (queue_work(dmz->chunk_wq, &cw->work))
@@ -967,7 +967,7 @@ static int dmz_iterate_devices(struct dm_target *ti,
static struct target_type dmz_type = {
.name = "zoned",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = dmz_ctr,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b89f07ee2eff..753302e83910 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -25,6 +25,7 @@
#include <linux/wait.h>
#include <linux/pr.h>
#include <linux/refcount.h>
+#include <linux/part_stat.h>
#define DM_MSG_PREFIX "core"
@@ -1788,7 +1789,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
* With request-based DM we only need to check the
* top-level queue for congestion.
*/
- r = md->queue->backing_dev_info->wb.state & bdi_bits;
+ struct backing_dev_info *bdi = md->queue->backing_dev_info;
+ r = bdi->wb.congested->state & bdi_bits;
} else {
map = dm_get_live_table_fast(md);
if (map)
@@ -1854,15 +1856,6 @@ static const struct dax_operations dm_dax_ops;
static void dm_wq_work(struct work_struct *work);
-static void dm_init_normal_md_queue(struct mapped_device *md)
-{
- /*
- * Initialize aspects of queue that aren't relevant for blk-mq
- */
- md->queue->backing_dev_info->congested_data = md;
- md->queue->backing_dev_info->congested_fn = dm_any_congested;
-}
-
static void cleanup_mapped_device(struct mapped_device *md)
{
if (md->wq)
@@ -1946,16 +1939,15 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
- md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
- if (!md->queue)
- goto bad;
- md->queue->queuedata = md;
/*
* default to bio-based required ->make_request_fn until DM
* table is loaded and md->type established. If request-based
* table is loaded: blk-mq will override accordingly.
*/
- blk_queue_make_request(md->queue, dm_make_request);
+ md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+ if (!md->queue)
+ goto bad;
+ md->queue->queuedata = md;
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2249,6 +2241,12 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
+static void dm_init_congested_fn(struct mapped_device *md)
+{
+ md->queue->backing_dev_info->congested_data = md;
+ md->queue->backing_dev_info->congested_fn = dm_any_congested;
+}
+
/*
* Setup the DM device's queue based on md's type
*/
@@ -2265,11 +2263,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
return r;
}
+ dm_init_congested_fn(md);
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
- dm_init_normal_md_queue(md);
+ dm_init_congested_fn(md);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
@@ -2368,6 +2367,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
map = dm_get_live_table(md, &srcu_idx);
if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map);
+ set_bit(DMF_SUSPENDED, &md->flags);
dm_table_postsuspend_targets(map);
}
/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 469f551863be..271e8a587354 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -58,8 +58,10 @@
#include <linux/delay.h>
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
+#include <linux/raid/detect.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
+#include <linux/part_stat.h>
#include <trace/events/block.h>
#include "md.h"
@@ -2491,12 +2493,12 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
{
int err = 0;
struct block_device *bdev;
- char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
shared ? (struct md_rdev *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
- pr_warn("md: could not open %s.\n", __bdevname(dev, b));
+ pr_warn("md: could not open device unknown-block(%u,%u).\n",
+ MAJOR(dev), MINOR(dev));
return PTR_ERR(bdev);
}
rdev->bdev = bdev;
@@ -5621,12 +5623,11 @@ static int md_alloc(dev_t dev, char *name)
mddev->hold_active = UNTIL_STOP;
error = -ENOMEM;
- mddev->queue = blk_alloc_queue(GFP_KERNEL);
+ mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
mddev->queue->queuedata = mddev;
- blk_queue_make_request(mddev->queue, md_make_request);
blk_set_stacking_limits(&mddev->queue->limits);
disk = alloc_disk(1 << shift);
@@ -6184,7 +6185,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
md_bitmap_wait_behind_writes(mddev);
- if (mddev->pers && mddev->pers->quiesce) {
+ if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}