summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/super.c22
-rw-r--r--drivers/md/dm-crypt.c53
-rw-r--r--drivers/md/dm-flakey.c9
-rw-r--r--drivers/md/dm-ima.c42
-rw-r--r--drivers/md/dm-integrity.c12
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-log-writes.c2
-rw-r--r--drivers/md/dm-path-selector.c8
-rw-r--r--drivers/md/dm-path-selector.h2
-rw-r--r--drivers/md/dm-ps-historical-service-time.c9
-rw-r--r--drivers/md/dm-ps-io-affinity.c5
-rw-r--r--drivers/md/dm-ps-queue-length.c9
-rw-r--r--drivers/md/dm-ps-round-robin.c9
-rw-r--r--drivers/md/dm-ps-service-time.c9
-rw-r--r--drivers/md/dm-raid.c7
-rw-r--r--drivers/md/dm-stripe.c3
-rw-r--r--drivers/md/dm-table.c14
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin.c7
-rw-r--r--drivers/md/dm-vdo/funnel-workqueue.c3
-rw-r--r--drivers/md/dm-verity-fec.c4
-rw-r--r--drivers/md/dm-verity-target.c185
-rw-r--r--drivers/md/dm-verity.h22
-rw-r--r--drivers/md/dm-writecache.c11
-rw-r--r--drivers/md/dm-zone.c2
-rw-r--r--drivers/md/dm-zoned-target.c2
-rw-r--r--drivers/md/dm.c67
-rw-r--r--drivers/md/md.c73
-rw-r--r--drivers/md/md.h26
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid10.c4
-rw-r--r--drivers/md/raid5.c2
32 files changed, 282 insertions, 346 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 2ea490b9d370..1492c8552255 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -168,14 +168,14 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
{
const char *err;
struct cache_sb_disk *s;
- struct page *page;
+ struct folio *folio;
unsigned int i;
- page = read_cache_page_gfp(bdev->bd_mapping,
- SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
- if (IS_ERR(page))
+ folio = mapping_read_folio_gfp(bdev->bd_mapping,
+ SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
+ if (IS_ERR(folio))
return "IO error";
- s = page_address(page) + offset_in_page(SB_OFFSET);
+ s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET);
sb->offset = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -272,7 +272,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
*res = s;
return NULL;
err:
- put_page(page);
+ folio_put(folio);
return err;
}
@@ -1366,7 +1366,7 @@ static CLOSURE_CALLBACK(cached_dev_free)
mutex_unlock(&bch_register_lock);
if (dc->sb_disk)
- put_page(virt_to_page(dc->sb_disk));
+ folio_put(virt_to_folio(dc->sb_disk));
if (dc->bdev_file)
fput(dc->bdev_file);
@@ -2216,7 +2216,7 @@ void bch_cache_release(struct kobject *kobj)
free_fifo(&ca->free[i]);
if (ca->sb_disk)
- put_page(virt_to_page(ca->sb_disk));
+ folio_put(virt_to_folio(ca->sb_disk));
if (ca->bdev_file)
fput(ca->bdev_file);
@@ -2593,7 +2593,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!holder) {
ret = -ENOMEM;
err = "cannot allocate memory";
- goto out_put_sb_page;
+ goto out_put_sb_folio;
}
/* Now reopen in exclusive mode with proper holder */
@@ -2667,8 +2667,8 @@ async_done:
out_free_holder:
kfree(holder);
-out_put_sb_page:
- put_page(virt_to_page(sb_disk));
+out_put_sb_folio:
+ folio_put(virt_to_folio(sb_disk));
out_blkdev_put:
if (bdev_file)
fput(bdev_file);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 17157c4216a5..5ef43231fe77 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -253,17 +253,35 @@ MODULE_PARM_DESC(max_read_size, "Maximum size of a read request");
static unsigned int max_write_size = 0;
module_param(max_write_size, uint, 0644);
MODULE_PARM_DESC(max_write_size, "Maximum size of a write request");
-static unsigned get_max_request_size(struct crypt_config *cc, bool wrt)
+
+static unsigned get_max_request_sectors(struct dm_target *ti, struct bio *bio)
{
+ struct crypt_config *cc = ti->private;
unsigned val, sector_align;
- val = !wrt ? READ_ONCE(max_read_size) : READ_ONCE(max_write_size);
- if (likely(!val))
- val = !wrt ? DM_CRYPT_DEFAULT_MAX_READ_SIZE : DM_CRYPT_DEFAULT_MAX_WRITE_SIZE;
- if (wrt || cc->used_tag_size) {
- if (unlikely(val > BIO_MAX_VECS << PAGE_SHIFT))
- val = BIO_MAX_VECS << PAGE_SHIFT;
- }
- sector_align = max(bdev_logical_block_size(cc->dev->bdev), (unsigned)cc->sector_size);
+ bool wrt = op_is_write(bio_op(bio));
+
+ if (wrt) {
+ /*
+ * For zoned devices, splitting write operations creates the
+ * risk of deadlocking queue freeze operations with zone write
+ * plugging BIO work when the reminder of a split BIO is
+ * issued. So always allow the entire BIO to proceed.
+ */
+ if (ti->emulate_zone_append)
+ return bio_sectors(bio);
+
+ val = min_not_zero(READ_ONCE(max_write_size),
+ DM_CRYPT_DEFAULT_MAX_WRITE_SIZE);
+ } else {
+ val = min_not_zero(READ_ONCE(max_read_size),
+ DM_CRYPT_DEFAULT_MAX_READ_SIZE);
+ }
+
+ if (wrt || cc->used_tag_size)
+ val = min(val, BIO_MAX_VECS << PAGE_SHIFT);
+
+ sector_align = max(bdev_logical_block_size(cc->dev->bdev),
+ (unsigned)cc->sector_size);
val = round_down(val, sector_align);
if (unlikely(!val))
val = sector_align;
@@ -1192,11 +1210,11 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
return -EINVAL;
}
- if (bi->tuple_size < cc->used_tag_size) {
+ if (bi->metadata_size < cc->used_tag_size) {
ti->error = "Integrity profile tag size mismatch.";
return -EINVAL;
}
- cc->tuple_size = bi->tuple_size;
+ cc->tuple_size = bi->metadata_size;
if (1 << bi->interval_exp != cc->sector_size) {
ti->error = "Integrity profile sector size mismatch.";
return -EINVAL;
@@ -3496,7 +3514,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
/*
* Check if bio is too large, split as needed.
*/
- max_sectors = get_max_request_size(cc, bio_data_dir(bio) == WRITE);
+ max_sectors = get_max_request_sectors(ti, bio);
if (unlikely(bio_sectors(bio) > max_sectors))
dm_accept_partial_bio(bio, max_sectors);
@@ -3733,6 +3751,17 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
max_t(unsigned int, limits->physical_block_size, cc->sector_size);
limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
limits->dma_alignment = limits->logical_block_size - 1;
+
+ /*
+ * For zoned dm-crypt targets, there will be no internal splitting of
+ * write BIOs to avoid exceeding BIO_MAX_VECS vectors per BIO. But
+ * without respecting this limit, crypt_alloc_buffer() will trigger a
+ * BUG(). Avoid this by forcing DM core to split write BIOs to this
+ * limit.
+ */
+ if (ti->emulate_zone_append)
+ limits->max_hw_sectors = min(limits->max_hw_sectors,
+ BIO_MAX_VECS << PAGE_SECTORS_SHIFT);
}
static struct target_type crypt_target = {
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index c711db6f8f5c..cf17fd46e255 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -215,16 +215,19 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
}
if (test_bit(DROP_WRITES, &fc->flags) &&
- (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) {
+ ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) ||
+ fc->random_write_corrupt)) {
ti->error = "drop_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
} else if (test_bit(ERROR_WRITES, &fc->flags) &&
- (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) {
+ ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) ||
+ fc->random_write_corrupt)) {
ti->error = "error_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
} else if (test_bit(ERROR_READS, &fc->flags) &&
- (fc->corrupt_bio_rw == READ || fc->random_read_corrupt)) {
+ ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == READ) ||
+ fc->random_read_corrupt)) {
ti->error = "error_reads is incompatible with random_read_corrupt or corrupt_bio_byte with the READ flag set";
return -EINVAL;
}
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index b90f34259fbb..8b50c908c6f4 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -241,10 +241,11 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
/*
* First retrieve the target metadata.
*/
- scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN,
- "target_index=%d,target_begin=%llu,target_len=%llu,",
- i, ti->begin, ti->len);
- target_metadata_buf_len = strlen(target_metadata_buf);
+ target_metadata_buf_len =
+ scnprintf(target_metadata_buf,
+ DM_IMA_TARGET_METADATA_BUF_LEN,
+ "target_index=%d,target_begin=%llu,target_len=%llu,",
+ i, ti->begin, ti->len);
/*
* Then retrieve the actual target data.
@@ -448,11 +449,9 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap)
if (r)
goto error;
- scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
- "%sname=%s,uuid=%s;device_resume=no_data;",
- DM_IMA_VERSION_STR, dev_name, dev_uuid);
- l = strlen(device_table_data);
-
+ l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;device_resume=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
capacity_len = strlen(capacity_str);
@@ -561,10 +560,9 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all)
if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
goto error;
- scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
- "%sname=%s,uuid=%s;device_remove=no_data;",
- DM_IMA_VERSION_STR, dev_name, dev_uuid);
- l = strlen(device_table_data);
+ l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;device_remove=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
memcpy(device_table_data + l, remove_all_str, remove_all_len);
@@ -647,10 +645,9 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map)
if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
goto error2;
- scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
- "%sname=%s,uuid=%s;table_clear=no_data;",
- DM_IMA_VERSION_STR, dev_name, dev_uuid);
- l = strlen(device_table_data);
+ l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;table_clear=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
capacity_len = strlen(capacity_str);
@@ -706,7 +703,7 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md)
char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL;
char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL;
bool noio = true;
- int r;
+ int r, len;
if (dm_ima_alloc_and_copy_device_data(md, &new_device_data,
md->ima.active_table.num_targets, noio))
@@ -728,12 +725,11 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md)
md->ima.active_table.device_metadata = new_device_data;
md->ima.active_table.device_metadata_len = strlen(new_device_data);
- scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2,
- "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data,
- new_dev_name, new_dev_uuid, capacity_str);
+ len = scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2,
+ "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data,
+ new_dev_name, new_dev_uuid, capacity_str);
- dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data),
- noio);
+ dm_ima_measure_data("dm_device_rename", combined_device_data, len, noio);
goto exit;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 4395657fa583..efeee0a873c0 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3906,8 +3906,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
struct blk_integrity *bi = &limits->integrity;
memset(bi, 0, sizeof(*bi));
- bi->tuple_size = ic->tag_size;
- bi->tag_size = bi->tuple_size;
+ bi->metadata_size = ic->tag_size;
+ bi->tag_size = bi->metadata_size;
bi->interval_exp =
ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
}
@@ -4746,18 +4746,18 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
ti->error = "Integrity profile not supported";
goto bad;
}
- /*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/
- if (bi->tuple_size < ic->tag_size) {
+ /*printk("tag_size: %u, metadata_size: %u\n", bi->tag_size, bi->metadata_size);*/
+ if (bi->metadata_size < ic->tag_size) {
r = -EINVAL;
ti->error = "The integrity profile is smaller than tag size";
goto bad;
}
- if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) {
+ if ((unsigned long)bi->metadata_size > PAGE_SIZE / 2) {
r = -EINVAL;
ti->error = "Too big tuple size";
goto bad;
}
- ic->tuple_size = bi->tuple_size;
+ ic->tuple_size = bi->metadata_size;
if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) {
r = -EINVAL;
ti->error = "Integrity profile sector size mismatch";
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 15538ec58f8e..73bf290af181 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -170,7 +170,7 @@ static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
- pfn_t *pfn)
+ unsigned long *pfn)
{
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index d484e8e1d48a..679b07dee229 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -893,7 +893,7 @@ static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
- pfn_t *pfn)
+ unsigned long *pfn)
{
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index 3e4cb81ce512..d0b883fabfeb 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -117,16 +117,16 @@ int dm_register_path_selector(struct path_selector_type *pst)
}
EXPORT_SYMBOL_GPL(dm_register_path_selector);
-int dm_unregister_path_selector(struct path_selector_type *pst)
+void dm_unregister_path_selector(struct path_selector_type *pst)
{
struct ps_internal *psi;
down_write(&_ps_lock);
psi = __find_path_selector_type(pst->name);
- if (!psi) {
+ if (WARN_ON(!psi)) {
up_write(&_ps_lock);
- return -EINVAL;
+ return;
}
list_del(&psi->list);
@@ -134,7 +134,5 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
up_write(&_ps_lock);
kfree(psi);
-
- return 0;
}
EXPORT_SYMBOL_GPL(dm_unregister_path_selector);
diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h
index 3861b2d8b963..7b2270532e64 100644
--- a/drivers/md/dm-path-selector.h
+++ b/drivers/md/dm-path-selector.h
@@ -96,7 +96,7 @@ struct path_selector_type {
int dm_register_path_selector(struct path_selector_type *type);
/* Unregister a path selector */
-int dm_unregister_path_selector(struct path_selector_type *type);
+void dm_unregister_path_selector(struct path_selector_type *type);
/* Returns a registered path selector type */
struct path_selector_type *dm_get_path_selector(const char *name);
diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
index b49e10d76d03..f07e773d9cc0 100644
--- a/drivers/md/dm-ps-historical-service-time.c
+++ b/drivers/md/dm-ps-historical-service-time.c
@@ -541,8 +541,10 @@ static int __init dm_hst_init(void)
{
int r = dm_register_path_selector(&hst_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " HST_VERSION " loaded");
@@ -551,10 +553,7 @@ static int __init dm_hst_init(void)
static void __exit dm_hst_exit(void)
{
- int r = dm_unregister_path_selector(&hst_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&hst_ps);
}
module_init(dm_hst_init);
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
index 716807e511ee..80415a045c68 100644
--- a/drivers/md/dm-ps-io-affinity.c
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -260,10 +260,7 @@ static int __init dm_ioa_init(void)
static void __exit dm_ioa_exit(void)
{
- int ret = dm_unregister_path_selector(&ioa_ps);
-
- if (ret < 0)
- DMERR("unregister failed %d", ret);
+ dm_unregister_path_selector(&ioa_ps);
}
module_init(dm_ioa_init);
diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c
index e305f05ad1e5..9c68701ed7a4 100644
--- a/drivers/md/dm-ps-queue-length.c
+++ b/drivers/md/dm-ps-queue-length.c
@@ -260,8 +260,10 @@ static int __init dm_ql_init(void)
{
int r = dm_register_path_selector(&ql_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " QL_VERSION " loaded");
@@ -270,10 +272,7 @@ static int __init dm_ql_init(void)
static void __exit dm_ql_exit(void)
{
- int r = dm_unregister_path_selector(&ql_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&ql_ps);
}
module_init(dm_ql_init);
diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c
index d1745b123dc1..0c12f4073461 100644
--- a/drivers/md/dm-ps-round-robin.c
+++ b/drivers/md/dm-ps-round-robin.c
@@ -220,8 +220,10 @@ static int __init dm_rr_init(void)
{
int r = dm_register_path_selector(&rr_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " RR_VERSION " loaded");
@@ -230,10 +232,7 @@ static int __init dm_rr_init(void)
static void __exit dm_rr_exit(void)
{
- int r = dm_unregister_path_selector(&rr_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&rr_ps);
}
module_init(dm_rr_init);
diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c
index 969d31c40272..0543fe7969c4 100644
--- a/drivers/md/dm-ps-service-time.c
+++ b/drivers/md/dm-ps-service-time.c
@@ -341,8 +341,10 @@ static int __init dm_st_init(void)
{
int r = dm_register_path_selector(&st_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " ST_VERSION " loaded");
@@ -351,10 +353,7 @@ static int __init dm_st_init(void)
static void __exit dm_st_exit(void)
{
- int r = dm_unregister_path_selector(&st_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&st_ps);
}
module_init(dm_st_init);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e8c0a8c6fb51..15c538ee9537 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -14,7 +14,6 @@
#include "raid5.h"
#include "raid10.h"
#include "md-bitmap.h"
-#include "dm-core.h"
#include <linux/device-mapper.h>
@@ -2532,6 +2531,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
struct md_rdev *rdev, *freshest;
struct mddev *mddev = &rs->md;
+ /* Respect resynchronization requested with "sync" argument. */
+ if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
+ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+
freshest = NULL;
rdev_for_each(rdev, mddev) {
if (test_bit(Journal, &rdev->flags))
@@ -3305,7 +3308,7 @@ size_check:
/* Disable/enable discard support on raid set. */
configure_discard_support(rs);
- rs->md.dm_gendisk = ti->table->md->disk;
+ rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table));
mddev_unlock(&rs->md);
return 0;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index a7dc04bd55e5..58902091bf79 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -316,7 +316,7 @@ static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
- pfn_t *pfn)
+ unsigned long *pfn)
{
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
@@ -458,6 +458,7 @@ static void stripe_io_hints(struct dm_target *ti,
struct stripe_c *sc = ti->private;
unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT;
+ limits->chunk_sectors = sc->chunk_size;
limits->io_min = chunk_size;
limits->io_opt = chunk_size * sc->stripes;
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 24a857ff6d0b..ad0a60a07b93 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -899,17 +899,17 @@ static bool dm_table_supports_dax(struct dm_table *t,
return true;
}
-static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static int device_is_not_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
{
struct block_device *bdev = dev->bdev;
struct request_queue *q = bdev_get_queue(bdev);
/* request-based cannot stack on partitions! */
if (bdev_is_partition(bdev))
- return false;
+ return true;
- return queue_is_mq(q);
+ return !queue_is_mq(q);
}
static int dm_table_determine_type(struct dm_table *t)
@@ -1005,7 +1005,7 @@ verify_rq_based:
/* Non-request-stackable devices can't be used for request-based dm */
if (!ti->type->iterate_devices ||
- !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
+ ti->type->iterate_devices(ti, device_is_not_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
@@ -2065,8 +2065,10 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
limits->discard_alignment = 0;
}
- if (!dm_table_supports_write_zeroes(t))
+ if (!dm_table_supports_write_zeroes(t)) {
limits->max_write_zeroes_sectors = 0;
+ limits->max_hw_wzeroes_unmap_sectors = 0;
+ }
if (!dm_table_supports_secure_erase(t))
limits->max_secure_erase_sectors = 0;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 652627aea11b..2af5a9514c05 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -255,7 +255,7 @@ static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits)
static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
- pfn_t *pfn)
+ unsigned long *pfn)
{
return -EIO;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 05cf4e3f2bbe..007bb93e5fca 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4111,8 +4111,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
- DM_TARGET_IMMUTABLE,
- .version = {1, 23, 0},
+ DM_TARGET_IMMUTABLE | DM_TARGET_PASSES_CRYPTO,
+ .version = {1, 24, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -4497,7 +4497,8 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 23, 0},
+ .features = DM_TARGET_PASSES_CRYPTO,
+ .version = {1, 24, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c
index ae11941c90a9..0613c82bbe8e 100644
--- a/drivers/md/dm-vdo/funnel-workqueue.c
+++ b/drivers/md/dm-vdo/funnel-workqueue.c
@@ -252,8 +252,7 @@ static void service_work_queue(struct simple_work_queue *queue)
* This speeds up some performance tests; that "other work" might include other VDO
* threads.
*/
- if (need_resched())
- cond_resched();
+ cond_resched();
}
run_finish_hook(queue);
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 631a887b487c..d382a390d39a 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -191,7 +191,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
u8 *want_digest, u8 *data)
{
if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io), true)))
+ verity_io_real_digest(v, io))))
return 0;
return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -392,7 +392,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io), true);
+ verity_io_real_digest(v, io));
if (unlikely(r < 0))
return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 81186bded1ce..66a00a8ccb39 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -19,7 +19,6 @@
#include "dm-audit.h"
#include <linux/module.h>
#include <linux/reboot.h>
-#include <linux/scatterlist.h>
#include <linux/string.h>
#include <linux/jump_label.h>
#include <linux/security.h>
@@ -61,9 +60,6 @@ module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644)
static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
-/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
-static DEFINE_STATIC_KEY_FALSE(ahash_enabled);
-
struct dm_verity_prefetch_work {
struct work_struct work;
struct dm_verity *v;
@@ -118,100 +114,21 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
return block >> (level * v->hash_per_block_bits);
}
-static int verity_ahash_update(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len,
- struct crypto_wait *wait)
-{
- struct scatterlist sg;
-
- if (likely(!is_vmalloc_addr(data))) {
- sg_init_one(&sg, data, len);
- ahash_request_set_crypt(req, &sg, NULL, len);
- return crypto_wait_req(crypto_ahash_update(req), wait);
- }
-
- do {
- int r;
- size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data));
-
- flush_kernel_vmap_range((void *)data, this_step);
- sg_init_table(&sg, 1);
- sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data));
- ahash_request_set_crypt(req, &sg, NULL, this_step);
- r = crypto_wait_req(crypto_ahash_update(req), wait);
- if (unlikely(r))
- return r;
- data += this_step;
- len -= this_step;
- } while (len);
-
- return 0;
-}
-
-/*
- * Wrapper for crypto_ahash_init, which handles verity salting.
- */
-static int verity_ahash_init(struct dm_verity *v, struct ahash_request *req,
- struct crypto_wait *wait, bool may_sleep)
-{
- int r;
-
- ahash_request_set_tfm(req, v->ahash_tfm);
- ahash_request_set_callback(req,
- may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
- crypto_req_done, (void *)wait);
- crypto_init_wait(wait);
-
- r = crypto_wait_req(crypto_ahash_init(req), wait);
-
- if (unlikely(r < 0)) {
- if (r != -ENOMEM)
- DMERR("crypto_ahash_init failed: %d", r);
- return r;
- }
-
- if (likely(v->salt_size && (v->version >= 1)))
- r = verity_ahash_update(v, req, v->salt, v->salt_size, wait);
-
- return r;
-}
-
-static int verity_ahash_final(struct dm_verity *v, struct ahash_request *req,
- u8 *digest, struct crypto_wait *wait)
-{
- int r;
-
- if (unlikely(v->salt_size && (!v->version))) {
- r = verity_ahash_update(v, req, v->salt, v->salt_size, wait);
-
- if (r < 0) {
- DMERR("%s failed updating salt: %d", __func__, r);
- goto out;
- }
- }
-
- ahash_request_set_crypt(req, NULL, digest, 0);
- r = crypto_wait_req(crypto_ahash_final(req), wait);
-out:
- return r;
-}
-
int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
- const u8 *data, size_t len, u8 *digest, bool may_sleep)
+ const u8 *data, size_t len, u8 *digest)
{
+ struct shash_desc *desc = &io->hash_desc;
int r;
- if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) {
- struct ahash_request *req = verity_io_hash_req(v, io);
- struct crypto_wait wait;
-
- r = verity_ahash_init(v, req, &wait, may_sleep) ?:
- verity_ahash_update(v, req, data, len, &wait) ?:
- verity_ahash_final(v, req, digest, &wait);
+ desc->tfm = v->shash_tfm;
+ if (unlikely(v->initial_hashstate == NULL)) {
+ /* Version 0: salt at end */
+ r = crypto_shash_init(desc) ?:
+ crypto_shash_update(desc, data, len) ?:
+ crypto_shash_update(desc, v->salt, v->salt_size) ?:
+ crypto_shash_final(desc, digest);
} else {
- struct shash_desc *desc = verity_io_hash_req(v, io);
-
- desc->tfm = v->shash_tfm;
+ /* Version 1: salt at beginning */
r = crypto_shash_import(desc, v->initial_hashstate) ?:
crypto_shash_finup(desc, data, len, digest);
}
@@ -362,7 +279,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
}
r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io), !io->in_bh);
+ verity_io_real_digest(v, io));
if (unlikely(r < 0))
goto release_ret_r;
@@ -465,7 +382,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
goto free_ret;
r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io), true);
+ verity_io_real_digest(v, io));
if (unlikely(r))
goto free_ret;
@@ -581,7 +498,7 @@ static int verity_verify_io(struct dm_verity_io *io)
}
r = verity_hash(v, io, data, block_size,
- verity_io_real_digest(v, io), !io->in_bh);
+ verity_io_real_digest(v, io));
if (unlikely(r < 0)) {
kunmap_local(data);
return r;
@@ -1092,12 +1009,7 @@ static void verity_dtr(struct dm_target *ti)
kfree(v->zero_digest);
verity_free_sig(v);
- if (v->ahash_tfm) {
- static_branch_dec(&ahash_enabled);
- crypto_free_ahash(v->ahash_tfm);
- } else {
- crypto_free_shash(v->shash_tfm);
- }
+ crypto_free_shash(v->shash_tfm);
kfree(v->alg_name);
@@ -1157,7 +1069,8 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
if (!v->zero_digest)
return r;
- io = kmalloc(sizeof(*io) + v->hash_reqsize, GFP_KERNEL);
+ io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm),
+ GFP_KERNEL);
if (!io)
return r; /* verity_dtr will free zero_digest */
@@ -1168,7 +1081,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
goto out;
r = verity_hash(v, io, zero_data, 1 << v->data_dev_block_bits,
- v->zero_digest, true);
+ v->zero_digest);
out:
kfree(io);
@@ -1324,9 +1237,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
{
struct dm_target *ti = v->ti;
- struct crypto_ahash *ahash;
- struct crypto_shash *shash = NULL;
- const char *driver_name;
+ struct crypto_shash *shash;
v->alg_name = kstrdup(alg_name, GFP_KERNEL);
if (!v->alg_name) {
@@ -1334,50 +1245,14 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
return -ENOMEM;
}
- /*
- * Allocate the hash transformation object that this dm-verity instance
- * will use. The vast majority of dm-verity users use CPU-based
- * hashing, so when possible use the shash API to minimize the crypto
- * API overhead. If the ahash API resolves to a different driver
- * (likely an off-CPU hardware offload), use ahash instead. Also use
- * ahash if the obsolete dm-verity format with the appended salt is
- * being used, so that quirk only needs to be handled in one place.
- */
- ahash = crypto_alloc_ahash(alg_name, 0,
- v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0);
- if (IS_ERR(ahash)) {
+ shash = crypto_alloc_shash(alg_name, 0, 0);
+ if (IS_ERR(shash)) {
ti->error = "Cannot initialize hash function";
- return PTR_ERR(ahash);
- }
- driver_name = crypto_ahash_driver_name(ahash);
- if (v->version >= 1 /* salt prepended, not appended? */) {
- shash = crypto_alloc_shash(alg_name, 0, 0);
- if (!IS_ERR(shash) &&
- strcmp(crypto_shash_driver_name(shash), driver_name) != 0) {
- /*
- * ahash gave a different driver than shash, so probably
- * this is a case of real hardware offload. Use ahash.
- */
- crypto_free_shash(shash);
- shash = NULL;
- }
- }
- if (!IS_ERR_OR_NULL(shash)) {
- crypto_free_ahash(ahash);
- ahash = NULL;
- v->shash_tfm = shash;
- v->digest_size = crypto_shash_digestsize(shash);
- v->hash_reqsize = sizeof(struct shash_desc) +
- crypto_shash_descsize(shash);
- DMINFO("%s using shash \"%s\"", alg_name, driver_name);
- } else {
- v->ahash_tfm = ahash;
- static_branch_inc(&ahash_enabled);
- v->digest_size = crypto_ahash_digestsize(ahash);
- v->hash_reqsize = sizeof(struct ahash_request) +
- crypto_ahash_reqsize(ahash);
- DMINFO("%s using ahash \"%s\"", alg_name, driver_name);
+ return PTR_ERR(shash);
}
+ v->shash_tfm = shash;
+ v->digest_size = crypto_shash_digestsize(shash);
+ DMINFO("%s using \"%s\"", alg_name, crypto_shash_driver_name(shash));
if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
ti->error = "Digest size too big";
return -EINVAL;
@@ -1402,7 +1277,7 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
return -EINVAL;
}
}
- if (v->shash_tfm) {
+ if (v->version) { /* Version 1: salt at beginning */
SHASH_DESC_ON_STACK(desc, v->shash_tfm);
int r;
@@ -1681,7 +1556,8 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->per_io_data_size = sizeof(struct dm_verity_io) + v->hash_reqsize;
+ ti->per_io_data_size = sizeof(struct dm_verity_io) +
+ crypto_shash_descsize(v->shash_tfm);
r = verity_fec_ctr(v);
if (r)
@@ -1788,10 +1664,7 @@ static int verity_preresume(struct dm_target *ti)
bdev = dm_disk(dm_table_get_md(ti->table))->part0;
root_digest.digest = v->root_digest;
root_digest.digest_len = v->digest_size;
- if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm)
- root_digest.alg = crypto_ahash_alg_name(v->ahash_tfm);
- else
- root_digest.alg = crypto_shash_alg_name(v->shash_tfm);
+ root_digest.alg = crypto_shash_alg_name(v->shash_tfm);
r = security_bdev_setintegrity(bdev, LSM_INT_DMVERITY_ROOTHASH, &root_digest,
sizeof(root_digest));
@@ -1817,7 +1690,7 @@ static struct target_type verity_target = {
.name = "verity",
/* Note: the LSMs depend on the singleton and immutable features */
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
- .version = {1, 11, 0},
+ .version = {1, 12, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 8cbb57862ae1..6d141abd965c 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -39,11 +39,10 @@ struct dm_verity {
struct dm_target *ti;
struct dm_bufio_client *bufio;
char *alg_name;
- struct crypto_ahash *ahash_tfm; /* either this or shash_tfm is set */
- struct crypto_shash *shash_tfm; /* either this or ahash_tfm is set */
+ struct crypto_shash *shash_tfm;
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
- u8 *initial_hashstate; /* salted initial state, if shash_tfm is set */
+ u8 *initial_hashstate; /* salted initial state, if version >= 1 */
u8 *zero_digest; /* digest for a zero block */
#ifdef CONFIG_SECURITY
u8 *root_digest_sig; /* signature of the root digest */
@@ -61,7 +60,6 @@ struct dm_verity {
bool hash_failed:1; /* set if hash of any block failed */
bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */
unsigned int digest_size; /* digest size for the current hash algorithm */
- unsigned int hash_reqsize; /* the size of temporary space for crypto */
enum verity_mode mode; /* mode for handling verification errors */
enum verity_mode error_mode;/* mode for handling I/O errors */
unsigned int corrupted_errs;/* Number of errors for corrupted blocks */
@@ -100,19 +98,13 @@ struct dm_verity_io {
u8 want_digest[HASH_MAX_DIGESTSIZE];
/*
- * This struct is followed by a variable-sized hash request of size
- * v->hash_reqsize, either a struct ahash_request or a struct shash_desc
- * (depending on whether ahash_tfm or shash_tfm is being used). To
- * access it, use verity_io_hash_req().
+ * Temporary space for hashing. This is variable-length and must be at
+ * the end of the struct. struct shash_desc is just the fixed part;
+ * it's followed by a context of size crypto_shash_descsize(shash_tfm).
*/
+ struct shash_desc hash_desc;
};
-static inline void *verity_io_hash_req(struct dm_verity *v,
- struct dm_verity_io *io)
-{
- return io + 1;
-}
-
static inline u8 *verity_io_real_digest(struct dm_verity *v,
struct dm_verity_io *io)
{
@@ -126,7 +118,7 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
}
extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
- const u8 *data, size_t len, u8 *digest, bool may_sleep);
+ const u8 *data, size_t len, u8 *digest);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index a428e1cacf07..d8de4a3076a1 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -13,7 +13,6 @@
#include <linux/dm-io.h>
#include <linux/dm-kcopyd.h>
#include <linux/dax.h>
-#include <linux/pfn_t.h>
#include <linux/libnvdimm.h>
#include <linux/delay.h>
#include "dm-io-tracker.h"
@@ -256,7 +255,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
int r;
loff_t s;
long p, da;
- pfn_t pfn;
+ unsigned long pfn;
int id;
struct page **pages;
sector_t offset;
@@ -290,7 +289,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
r = da;
goto err2;
}
- if (!pfn_t_has_page(pfn)) {
+ if (!pfn_valid(pfn)) {
wc->memory_map = NULL;
r = -EOPNOTSUPP;
goto err2;
@@ -314,13 +313,13 @@ static int persistent_memory_claim(struct dm_writecache *wc)
r = daa ? daa : -EINVAL;
goto err3;
}
- if (!pfn_t_has_page(pfn)) {
+ if (!pfn_valid(pfn)) {
r = -EOPNOTSUPP;
goto err3;
}
while (daa-- && i < p) {
- pages[i++] = pfn_t_to_page(pfn);
- pfn.val++;
+ pages[i++] = pfn_to_page(pfn);
+ pfn++;
if (!(i & 15))
cond_resched();
}
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 3d31b82e0730..78e17dd4d01b 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -467,8 +467,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
bdev_offset_from_zone_start(disk->part0,
clone->bi_iter.bi_sector);
}
-
- return;
}
static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 5da3db06da10..9da329078ea4 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1062,7 +1062,7 @@ static int dmz_iterate_devices(struct dm_target *ti,
struct dmz_target *dmz = ti->private;
unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata);
sector_t capacity;
- int i, r;
+ int i, r = 0;
for (i = 0; i < dmz->nr_ddevs; i++) {
capacity = dmz->dev[i].capacity & ~(zone_nr_sectors - 1);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1726f0f828cc..a44e8c2dccee 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1024,10 +1024,8 @@ static void dm_wq_requeue_work(struct work_struct *work)
*
* 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
*/
-static void dm_io_complete(struct dm_io *io)
+static inline void dm_io_complete(struct dm_io *io)
{
- bool first_requeue;
-
/*
* Only dm_io that has been split needs two stage requeue, otherwise
* we may run into long bio clone chain during suspend and OOM could
@@ -1036,12 +1034,7 @@ static void dm_io_complete(struct dm_io *io)
* Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
* also aren't handled via the first stage requeue.
*/
- if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
- first_requeue = true;
- else
- first_requeue = false;
-
- __dm_io_complete(io, first_requeue);
+ __dm_io_complete(io, dm_io_flagged(io, DM_IO_WAS_SPLIT));
}
/*
@@ -1218,7 +1211,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
- pfn_t *pfn)
+ unsigned long *pfn)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
@@ -1293,8 +1286,9 @@ out:
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
- * operations, REQ_OP_ZONE_APPEND (zone append writes) and any bio serviced by
- * __send_duplicate_bios().
+ * operations, zone append writes (native with REQ_OP_ZONE_APPEND or emulated
+ * with write BIOs flagged with BIO_EMULATES_ZONE_APPEND) and any bio serviced
+ * by __send_duplicate_bios().
*
* dm_accept_partial_bio informs the dm that the target only wants to process
* additional n_sectors sectors of the bio and the rest of the data should be
@@ -1327,11 +1321,19 @@ void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors)
unsigned int bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
- BUG_ON(op_is_zone_mgmt(bio_op(bio)));
- BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
BUG_ON(bio_sectors > *tio->len_ptr);
BUG_ON(n_sectors > bio_sectors);
+ if (static_branch_unlikely(&zoned_enabled) &&
+ unlikely(bdev_is_zoned(bio->bi_bdev))) {
+ enum req_op op = bio_op(bio);
+
+ BUG_ON(op_is_zone_mgmt(op));
+ BUG_ON(op == REQ_OP_WRITE);
+ BUG_ON(op == REQ_OP_WRITE_ZEROES);
+ BUG_ON(op == REQ_OP_ZONE_APPEND);
+ }
+
*tio->len_ptr -= bio_sectors - n_sectors;
bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
@@ -1776,19 +1778,35 @@ static void init_clone_info(struct clone_info *ci, struct dm_io *io,
}
#ifdef CONFIG_BLK_DEV_ZONED
-static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
- struct bio *bio)
+static inline bool dm_zone_bio_needs_split(struct bio *bio)
{
/*
- * For mapped device that need zone append emulation, we must
- * split any large BIO that straddles zone boundaries.
+ * Special case the zone operations that cannot or should not be split.
+ */
+ switch (bio_op(bio)) {
+ case REQ_OP_ZONE_APPEND:
+ case REQ_OP_ZONE_FINISH:
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_RESET_ALL:
+ return false;
+ default:
+ break;
+ }
+
+ /*
+ * When mapped devices use the block layer zone write plugging, we must
+ * split any large BIO to the mapped device limits to not submit BIOs
+ * that span zone boundaries and to avoid potential deadlocks with
+ * queue freeze operations.
*/
- return dm_emulate_zone_append(md) && bio_straddles_zones(bio) &&
- !bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
+ return bio_needs_zone_write_plugging(bio) || bio_straddles_zones(bio);
}
+
static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
- return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
+ if (!bio_needs_zone_write_plugging(bio))
+ return false;
+ return blk_zone_plug_bio(bio, 0);
}
static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
@@ -1904,8 +1922,7 @@ static blk_status_t __send_zone_reset_all(struct clone_info *ci)
}
#else
-static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
- struct bio *bio)
+static inline bool dm_zone_bio_needs_split(struct bio *bio)
{
return false;
}
@@ -1932,9 +1949,7 @@ static void dm_split_and_process_bio(struct mapped_device *md,
is_abnormal = is_abnormal_io(bio);
if (static_branch_unlikely(&zoned_enabled)) {
- /* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
- need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
- (is_abnormal || dm_zone_bio_needs_split(md, bio));
+ need_split = is_abnormal || dm_zone_bio_needs_split(bio);
} else {
need_split = is_abnormal;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0f03b21e66e4..046fe85c76fe 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -636,9 +636,6 @@ static void __mddev_put(struct mddev *mddev)
mddev->ctime || mddev->hold_active)
return;
- /* Array is not configured at all, and not held active, so destroy it */
- set_bit(MD_DELETED, &mddev->flags);
-
/*
* Call queue_work inside the spinlock so that flush_workqueue() after
* mddev_find will succeed in waiting for the work to be done.
@@ -873,6 +870,16 @@ void mddev_unlock(struct mddev *mddev)
kobject_del(&rdev->kobj);
export_rdev(rdev, mddev);
}
+
+ /* Call del_gendisk after release reconfig_mutex to avoid
+ * deadlock (e.g. call del_gendisk under the lock and an
+ * access to sysfs files waits the lock)
+ * And MD_DELETED is only used for md raid which is set in
+ * do_md_stop. dm raid only uses md_stop to stop. So dm raid
+ * doesn't need to check MD_DELETED when getting reconfig lock
+ */
+ if (test_bit(MD_DELETED, &mddev->flags))
+ del_gendisk(mddev->gendisk);
}
EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -5774,19 +5781,30 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
struct mddev *mddev = container_of(kobj, struct mddev, kobj);
ssize_t rv;
+ struct kernfs_node *kn = NULL;
if (!entry->store)
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
+
+ if (entry->store == array_state_store && cmd_match(page, "clear"))
+ kn = sysfs_break_active_protection(kobj, attr);
+
spin_lock(&all_mddevs_lock);
if (!mddev_get(mddev)) {
spin_unlock(&all_mddevs_lock);
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
return -EBUSY;
}
spin_unlock(&all_mddevs_lock);
rv = entry->store(mddev, page, length);
mddev_put(mddev);
+
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
+
return rv;
}
@@ -5794,12 +5812,6 @@ static void md_kobj_release(struct kobject *ko)
{
struct mddev *mddev = container_of(ko, struct mddev, kobj);
- if (mddev->sysfs_state)
- sysfs_put(mddev->sysfs_state);
- if (mddev->sysfs_level)
- sysfs_put(mddev->sysfs_level);
-
- del_gendisk(mddev->gendisk);
put_disk(mddev->gendisk);
}
@@ -6413,15 +6425,10 @@ static void md_clean(struct mddev *mddev)
mddev->persistent = 0;
mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0;
- /*
- * Don't clear MD_CLOSING, or mddev can be opened again.
- * 'hold_active != 0' means mddev is still in the creation
- * process and will be used later.
- */
- if (mddev->hold_active)
- mddev->flags = 0;
- else
- mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
+ /* if UNTIL_STOP is set, it's cleared here */
+ mddev->hold_active = 0;
+ /* Don't clear MD_CLOSING, or mddev can be opened again. */
+ mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
mddev->sb_flags = 0;
mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
@@ -6516,8 +6523,6 @@ static void __md_stop(struct mddev *mddev)
if (mddev->private)
pers->free(mddev, mddev->private);
mddev->private = NULL;
- if (pers->sync_request && mddev->to_remove == NULL)
- mddev->to_remove = &md_redundancy_group;
put_pers(pers);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -6646,10 +6651,8 @@ static int do_md_stop(struct mddev *mddev, int mode)
mddev->bitmap_info.offset = 0;
export_array(mddev);
-
md_clean(mddev);
- if (mddev->hold_active == UNTIL_STOP)
- mddev->hold_active = 0;
+ set_bit(MD_DELETED, &mddev->flags);
}
md_new_event();
sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -9456,17 +9459,11 @@ static bool md_spares_need_change(struct mddev *mddev)
return false;
}
-static int remove_and_add_spares(struct mddev *mddev,
- struct md_rdev *this)
+static int remove_spares(struct mddev *mddev, struct md_rdev *this)
{
struct md_rdev *rdev;
- int spares = 0;
int removed = 0;
- if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- /* Mustn't remove devices when resync thread is running */
- return 0;
-
rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
!mddev->pers->hot_remove_disk(mddev, rdev)) {
@@ -9480,6 +9477,21 @@ static int remove_and_add_spares(struct mddev *mddev,
if (removed && mddev->kobj.sd)
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
+ return removed;
+}
+
+static int remove_and_add_spares(struct mddev *mddev,
+ struct md_rdev *this)
+{
+ struct md_rdev *rdev;
+ int spares = 0;
+ int removed = 0;
+
+ if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ /* Mustn't remove devices when resync thread is running */
+ return 0;
+
+ removed = remove_spares(mddev, this);
if (this && removed)
goto no_add;
@@ -9522,6 +9534,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
/* Check if resync is in progress. */
if (mddev->recovery_cp < MaxSector) {
+ remove_spares(mddev, NULL);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
return true;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d45a9e6ead80..67b365621507 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -700,11 +700,26 @@ static inline bool reshape_interrupted(struct mddev *mddev)
static inline int __must_check mddev_lock(struct mddev *mddev)
{
- return mutex_lock_interruptible(&mddev->reconfig_mutex);
+ int ret;
+
+ ret = mutex_lock_interruptible(&mddev->reconfig_mutex);
+
+ /* MD_DELETED is set in do_md_stop with reconfig_mutex.
+ * So check it here.
+ */
+ if (!ret && test_bit(MD_DELETED, &mddev->flags)) {
+ ret = -ENODEV;
+ mutex_unlock(&mddev->reconfig_mutex);
+ }
+
+ return ret;
}
/* Sometimes we need to take the lock in a situation where
* failure due to interrupts is not acceptable.
+ * It doesn't need to check MD_DELETED here, the owner which
+ * holds the lock here can't be stopped. And all paths can't
+ * call this function after do_md_stop.
*/
static inline void mddev_lock_nointr(struct mddev *mddev)
{
@@ -713,7 +728,14 @@ static inline void mddev_lock_nointr(struct mddev *mddev)
static inline int mddev_trylock(struct mddev *mddev)
{
- return mutex_trylock(&mddev->reconfig_mutex);
+ int ret;
+
+ ret = mutex_trylock(&mddev->reconfig_mutex);
+ if (!ret && test_bit(MD_DELETED, &mddev->flags)) {
+ ret = -ENODEV;
+ mutex_unlock(&mddev->reconfig_mutex);
+ }
+ return ret;
}
extern void mddev_unlock(struct mddev *mddev);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index d8f639f4ae12..cbe2a9054cb9 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -384,6 +384,7 @@ static int raid0_set_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
+ lim.chunk_sectors = mddev->chunk_sectors;
lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c9bd2005bfd0..95dc354a86a0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2446,15 +2446,12 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* that are active
*/
for (i = 0; i < conf->copies; i++) {
- int d;
-
tbio = r10_bio->devs[i].repl_bio;
if (!tbio || !tbio->bi_end_io)
continue;
if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
&& r10_bio->devs[i].bio != fbio)
bio_copy_data(tbio, fbio);
- d = r10_bio->devs[i].devnum;
atomic_inc(&r10_bio->remaining);
submit_bio_noacct(tbio);
}
@@ -4012,6 +4009,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
+ lim.chunk_sectors = mddev->chunk_sectors;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ca5b0e8ba707..7ec61ee7b218 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -9040,7 +9040,7 @@ static int __init raid5_init(void)
int ret;
raid5_wq = alloc_workqueue("raid5wq",
- WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
+ WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_SYSFS, 0);
if (!raid5_wq)
return -ENOMEM;