From f1445032173d4a49eb8b4a0808db499966897d9a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 18 Sep 2023 17:33:29 +0200 Subject: dm: shortcut the calls to linear_map and stripe_map Shortcut the calls to linear_map and stripe_map, so that they don't suffer the overhead of retpolines used for indirect calls. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-linear.c | 2 +- drivers/md/dm-stripe.c | 2 +- drivers/md/dm.c | 11 +++++++++-- drivers/md/dm.h | 2 ++ 4 files changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index f4448d520ee9..2d3e186ca87e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -85,7 +85,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) return lc->start + dm_target_offset(ti, bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio) +int linear_map(struct dm_target *ti, struct bio *bio) { struct linear_c *lc = ti->private; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e2854a3cbd28..c11619d82a7e 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -268,7 +268,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, return DM_MAPIO_SUBMITTED; } -static int stripe_map(struct dm_target *ti, struct bio *bio) +int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 64a1f306c96c..ab7dd8ab51d1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1423,10 +1423,17 @@ static void __map_bio(struct bio *clone) */ if (unlikely(dm_emulate_zone_append(md))) r = dm_zone_map_bio(tio); + else + goto do_map; + } else { +do_map: + if (likely(ti->type->map == linear_map)) + r = linear_map(ti, clone); + else if (ti->type->map == stripe_map) + r = stripe_map(ti, clone); else r = ti->type->map(ti, clone); - } else - r = ti->type->map(ti, clone); + } switch (r) { case DM_MAPIO_SUBMITTED: diff --git a/drivers/md/dm.h b/drivers/md/dm.h index f682295af91f..7f1acbf6bd9e 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -188,9 +188,11 @@ void dm_kobject_release(struct kobject *kobj); /* * Targets for linear and striped mappings */ +int linear_map(struct dm_target *ti, struct bio *bio); int dm_linear_init(void); void dm_linear_exit(void); +int stripe_map(struct dm_target *ti, struct bio *bio); int dm_stripe_init(void); void dm_stripe_exit(void); -- cgit From ac4149ba7efd6bc327c1e15e812091984f3a16b2 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 25 Sep 2023 06:13:12 +0000 Subject: dm cache metadata: replace deprecated strncpy with strscpy `strncpy` is deprecated for use on NUL-terminated destination strings [1] and as such we should prefer more robust and less ambiguous string interfaces. It seems `cmd->policy_name` is intended to be NUL-terminated based on a now changed line of code from Commit (c6b4fcbad044e6ff "dm: add cache target"): | if (strcmp(cmd->policy_name, policy_name)) { // ... However, now a length-bounded strncmp is used: | if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name))) ... which means NUL-terminated may not strictly be required. However, I believe the intent of the code is clear and we should maintain NUL-termination of policy_names. Moreover, __begin_transaction_flags() zero-allocates `cmd` before calling read_superblock_fields(): | cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); Also, `disk_super->policy_name` is zero-initialized | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); ... therefore any NUL-padding is redundant. Considering the above, a suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on the destination buffer without unnecessarily NUL-padding. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Reviewed-by: Kees Cook Signed-off-by: Justin Stitt Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-metadata.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index acffed750e3e..5a18b80d3666 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -597,7 +597,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); - strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); + strscpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); @@ -707,7 +707,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); - strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); + strscpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); @@ -1726,7 +1726,7 @@ static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) return -EINVAL; - strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); + strscpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); hint_size = dm_cache_policy_get_hint_size(policy); -- cgit From e9d7bd2c8664aa43866c7985d9050a052516c07d Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 25 Sep 2023 06:35:54 +0000 Subject: dm crypt: replace open-coded kmemdup_nul kzalloc() followed by strncpy() on an expected NUL-terminated string is just kmemdup_nul(). Let's simplify this code (while also dropping a deprecated strncpy() call [1]). Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://github.com/KSPP/linux/issues/90 Reviewed-by: Kees Cook Signed-off-by: Justin Stitt Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f2662c21a6df..8a03b3590733 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2858,10 +2858,9 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api) if (!start || !end || ++start > end) return -EINVAL; - mac_alg = kzalloc(end - start + 1, GFP_KERNEL); + mac_alg = kmemdup_nul(start, end - start, GFP_KERNEL); if (!mac_alg) return -ENOMEM; - strncpy(mac_alg, start, end - start); mac = crypto_alloc_ahash(mac_alg, 0, CRYPTO_ALG_ALLOCATES_MEMORY); kfree(mac_alg); -- cgit From 0ffb645ea821fbad4215b6a5681b823639c24660 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 25 Sep 2023 06:54:51 +0000 Subject: dm ioctl: replace deprecated strncpy with strscpy_pad `strncpy` is deprecated for use on NUL-terminated destination strings [1] and as such we should prefer more robust and less ambiguous string interfaces. We expect `spec->target_type` to be NUL-terminated based on its use with a format string after `dm_table_add_target()` is called | r = dm_table_add_target(table, spec->target_type, | (sector_t) spec->sector_start, | (sector_t) spec->length, | target_params); ... wherein `spec->target_type` is passed as parameter `type` and later printed with DMERR: | DMERR("%s: %s: unknown target type", dm_device_name(t->md), type); It appears that `spec` is not zero-allocated and thus NUL-padding may be required in this ioctl context. Considering the above, a suitable replacement is `strscpy_pad` due to the fact that it guarantees NUL-termination whilst maintaining the NUL-padding behavior that strncpy provides. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://github.com/KSPP/linux/issues/90 Reviewed-by: Kees Cook Signed-off-by: Justin Stitt Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 21ebb6c39394..e65058e0ed06 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1295,8 +1295,8 @@ static void retrieve_status(struct dm_table *table, spec->status = 0; spec->sector_start = ti->begin; spec->length = ti->len; - strncpy(spec->target_type, ti->type->name, - sizeof(spec->target_type) - 1); + strscpy_pad(spec->target_type, ti->type->name, + sizeof(spec->target_type)); outptr += sizeof(struct dm_target_spec); remaining = len - (outptr - outbuf); -- cgit From 18ac52332959aaf4bee54ee1b760959beeb13ae2 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 25 Sep 2023 07:06:03 +0000 Subject: dm log userspace: replace deprecated strncpy with strscpy `strncpy` is deprecated for use on NUL-terminated destination strings [1] and as such we should prefer more robust and less ambiguous string interfaces. `lc` is already zero-allocated: | lc = kzalloc(sizeof(*lc), GFP_KERNEL); ... as such, any future NUL-padding is superfluous. A suitable replacement is `strscpy` [2] due to the fact that it guarantees NUL-termination on the destination buffer without unnecessarily NUL-padding. Let's also go with the more idiomatic `dest, src, sizeof(dest)` pattern for destination buffers that the compiler can calculate the size for. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] Link: https://github.com/KSPP/linux/issues/90 Reviewed-by: Kees Cook Signed-off-by: Justin Stitt Signed-off-by: Mike Snitzer --- drivers/md/dm-log-userspace-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 5aace6ee6d47..7e4f27e86150 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -224,7 +224,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, lc->usr_argc = argc; - strncpy(lc->uuid, argv[0], DM_UUID_LEN); + strscpy(lc->uuid, argv[0], sizeof(lc->uuid)); argc--; argv++; spin_lock_init(&lc->flush_lock); -- cgit From 34dbaa88cab121437d711bb7b9673c16eed4f922 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Oct 2023 13:34:54 +0200 Subject: dm: make __send_duplicate_bios return unsigned int All the callers cast the value returned by __send_duplicate_bios to unsigned int type, so we can return unsigned int as well. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ab7dd8ab51d1..1113a8da3c47 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1506,8 +1506,8 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } } -static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned int num_bios, unsigned int *len) +static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + unsigned int num_bios, unsigned int *len) { struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; -- cgit From 4a2fe2960891f1ccd7805d0973284fd44c2f12b4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Oct 2023 11:29:36 -0400 Subject: dm: enhance alloc_multiple_bios() to be more versatile alloc_multiple_bios() has the useful ability to try allocating bios with GFP_NOWAIT but will fallback to using GFP_NOIO. The callers service both empty flush bios and abnormal bios (e.g. discard). alloc_multiple_bios() enhancements offered in this commit: - don't require table_devices_lock if num_bios = 1 - allow caller to pass GFP_NOWAIT to do usual GFP_NOWAIT with GFP_NOIO fallback - allow caller to pass GFP_NOIO to _only_ allocate using GFP_NOIO Flush bios with data may be issued to DM with REQ_NOWAIT, as such it makes sense to attempt servicing them with GFP_NOWAIT allocations. But abnormal IO should never be issued using REQ_NOWAIT (if that changes in the future that's fine, but no sense supporting it now). While at it, rename __send_changing_extent_only() to __send_abnormal_io(). [Thanks to both Ming and Mikulas for help with translating known possible IO scenarios to requirements.] Suggested-by: Ming Lei Suggested-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 68 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1113a8da3c47..b5b1b220dfd6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1478,15 +1478,15 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len) static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct dm_target *ti, unsigned int num_bios, - unsigned *len) + unsigned *len, gfp_t gfp_flag) { struct bio *bio; - int try; + int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1; - for (try = 0; try < 2; try++) { + for (; try < 2; try++) { int bio_nr; - if (try) + if (try && num_bios > 1) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { bio = alloc_tio(ci, ti, bio_nr, len, @@ -1496,7 +1496,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, bio_list_add(blist, bio); } - if (try) + if (try && num_bios > 1) mutex_unlock(&ci->io->md->table_devices_lock); if (bio_nr == num_bios) return; @@ -1507,33 +1507,30 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned int num_bios, unsigned int *len) + unsigned int num_bios, unsigned int *len, + gfp_t gfp_flag) { struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; unsigned int ret = 0; - switch (num_bios) { - case 0: - break; - case 1: - if (len) - setup_split_accounting(ci, *len); - clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - __map_bio(clone); - ret = 1; - break; - default: - if (len) - setup_split_accounting(ci, *len); - /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ - alloc_multiple_bios(&blist, ci, ti, num_bios, len); - while ((clone = bio_list_pop(&blist))) { + if (WARN_ON_ONCE(num_bios == 0)) /* num_bios = 0 is a bug in caller */ + return 0; + + /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ + if (len) + setup_split_accounting(ci, *len); + + /* + * Using alloc_multiple_bios(), even if num_bios is 1, to consistently + * support allocating using GFP_NOWAIT with GFP_NOIO fallback. + */ + alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag); + while ((clone = bio_list_pop(&blist))) { + if (num_bios > 1) dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); - __map_bio(clone); - ret += 1; - } - break; + __map_bio(clone); + ret += 1; } return ret; @@ -1560,8 +1557,12 @@ static void __send_empty_flush(struct clone_info *ci) unsigned int bios; struct dm_target *ti = dm_table_get_target(t, i); + if (unlikely(ti->num_flush_bios == 0)) + continue; + atomic_add(ti->num_flush_bios, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); + bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, + NULL, GFP_NOWAIT); atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); } @@ -1574,10 +1575,9 @@ static void __send_empty_flush(struct clone_info *ci) bio_uninit(ci->bio); } -static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - unsigned int num_bios, - unsigned int max_granularity, - unsigned int max_sectors) +static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti, + unsigned int num_bios, unsigned int max_granularity, + unsigned int max_sectors) { unsigned int len, bios; @@ -1585,7 +1585,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target __max_io_len(ti, ci->sector, max_granularity, max_sectors)); atomic_add(num_bios, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, num_bios, &len); + bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO); /* * alloc_io() takes one extra reference for submission, so the * reference won't reach 0 without the following (+1) subtraction @@ -1654,8 +1654,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, if (unlikely(!num_bios)) return BLK_STS_NOTSUPP; - __send_changing_extent_only(ci, ti, num_bios, - max_granularity, max_sectors); + __send_abnormal_io(ci, ti, num_bios, max_granularity, max_sectors); + return BLK_STS_OK; } -- cgit From 6f25dd1c57b8ba92d899eae4eda16573a2f78ffc Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 25 Oct 2023 19:29:03 -0400 Subject: dm: respect REQ_NOWAIT flag in normal bios issued to DM Update DM core's normal IO submission to allocate required memory using GFP_NOWAIT if REQ_NOWAIT is set. Tested with simple test provided in commit a9ce385344f916 ("dm: don't attempt to queue IO under RCU protection") that was enhanced to check error codes. Also tested using fio's pvsync2 with nowait=1. But testing with induced GFP_NOWAIT allocation failures wasn't performed (yet). Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b5b1b220dfd6..0dd8ddf40006 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -570,13 +570,15 @@ static void dm_end_io_acct(struct dm_io *io) dm_io_acct(io, true); } -static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) +static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio, gfp_t gfp_mask) { struct dm_io *io; struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); + clone = bio_alloc_clone(NULL, bio, gfp_mask, &md->mempools->io_bs); + if (unlikely(!clone)) + return NULL; tio = clone_to_tio(clone); tio->flags = 0; dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); @@ -1714,10 +1716,6 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) if (unlikely(!ti)) return BLK_STS_IOERR; - if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && - unlikely(!dm_target_supports_nowait(ti->type))) - return BLK_STS_NOTSUPP; - if (unlikely(ci->is_abnormal_io)) return __process_abnormal_io(ci, ti); @@ -1729,7 +1727,17 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); setup_split_accounting(ci, len); - clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + + if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) { + if (unlikely(!dm_target_supports_nowait(ti->type))) + return BLK_STS_NOTSUPP; + + clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT); + if (unlikely(!clone)) + return BLK_STS_AGAIN; + } else { + clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + } __map_bio(clone); ci->sector += len; @@ -1738,11 +1746,11 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) return BLK_STS_OK; } -static void init_clone_info(struct clone_info *ci, struct mapped_device *md, +static void init_clone_info(struct clone_info *ci, struct dm_io *io, struct dm_table *map, struct bio *bio, bool is_abnormal) { ci->map = map; - ci->io = alloc_io(md, bio); + ci->io = io; ci->bio = bio; ci->is_abnormal_io = is_abnormal; ci->submit_as_polled = false; @@ -1777,8 +1785,18 @@ static void dm_split_and_process_bio(struct mapped_device *md, return; } - init_clone_info(&ci, md, map, bio, is_abnormal); - io = ci.io; + /* Only support nowait for normal IO */ + if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) { + io = alloc_io(md, bio, GFP_NOWAIT); + if (unlikely(!io)) { + /* Unable to do anything without dm_io. */ + bio_wouldblock_error(bio); + return; + } + } else { + io = alloc_io(md, bio, GFP_NOIO); + } + init_clone_info(&ci, io, map, bio, is_abnormal); if (bio->bi_opf & REQ_PREFLUSH) { __send_empty_flush(&ci); -- cgit From 70bbeb29fab09d6ea6cfe64109db60a97d84d739 Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Fri, 20 Oct 2023 12:46:05 +0100 Subject: dm delay: for short delays, use kthread instead of timers and wq DM delay's current design of using timers and wq to realize the delays is insufficient for delays below ~50ms. This commit enhances the design to use a kthread to flush the expired delays, trading some CPU time (in some cases) for better delay accuracy and delays closer to what the user requested for smaller delays. The new design is chosen as long as all the delays are below 50ms. Since bios can't be completed in interrupt context using a kthread is probably the most reasonable way to approach this. Testing with echo "0 2097152 zero" | dmsetup create dm-zeros for i in $(seq 0 20); do echo "0 2097152 delay /dev/mapper/dm-zeros 0 $i" | dmsetup create dm-delay-${i}ms; done Some performance numbers for comparison, on beaglebone black (single core) CONFIG_HZ_1000=y: fio --name=1msread --rw=randread --bs=4k --runtime=60 --time_based \ --filename=/dev/mapper/dm-delay-1ms Theoretical maximum: 1000 IOPS Previous: 250 IOPS Kthread: 500 IOPS fio --name=10msread --rw=randread --bs=4k --runtime=60 --time_based \ --filename=/dev/mapper/dm-delay-10ms Theoretical maximum: 100 IOPS Previous: 45 IOPS Kthread: 50 IOPS fio --name=1mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \ --time_based --filename=/dev/mapper/dm-delay-1ms Theoretical maximum: 1000 IOPS Previous: 498 IOPS Kthread: 1000 IOPS fio --name=10mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \ --time_based --filename=/dev/mapper/dm-delay-10ms Theoretical maximum: 100 IOPS Previous: 90 IOPS Kthread: 100 IOPS (This one is just to prove the new design isn't impacting throughput, not really about delays): fio --name=10mswriteasync --rw=randwrite --direct=1 --bs=4k \ --runtime=60 --time_based --filename=/dev/mapper/dm-delay-10ms \ --numjobs=32 --iodepth=64 --ioengine=libaio --group_reporting Previous: 13.3k IOPS Kthread: 13.3k IOPS Signed-off-by: Christian Loehle [Harshit: kthread_create error handling fix in delay_ctr] Signed-off-by: Harshit Mogalapalli Signed-off-by: Mike Snitzer --- drivers/md/dm-delay.c | 103 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 15 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 7433525e5985..efd510984e25 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -31,6 +32,7 @@ struct delay_c { struct workqueue_struct *kdelayd_wq; struct work_struct flush_expired_bios; struct list_head delayed_bios; + struct task_struct *worker; atomic_t may_delay; struct delay_class read; @@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires) mutex_unlock(&dc->timer_lock); } +static inline bool delay_is_fast(struct delay_c *dc) +{ + return !!dc->worker; +} + +static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) +{ + struct dm_delay_info *delayed, *next; + + mutex_lock(&delayed_bios_lock); + list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { + if (flush_all || time_after_eq(jiffies, delayed->expires)) { + struct bio *bio = dm_bio_from_per_bio_data(delayed, + sizeof(struct dm_delay_info)); + list_del(&delayed->list); + dm_submit_bio_remap(bio, NULL); + delayed->class->ops--; + } + } + mutex_unlock(&delayed_bios_lock); +} + +static int flush_worker_fn(void *data) +{ + struct delay_c *dc = data; + + while (1) { + flush_delayed_bios_fast(dc, false); + if (unlikely(list_empty(&dc->delayed_bios))) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } else + cond_resched(); + } + + return 0; +} + static void flush_bios(struct bio *bio) { struct bio *n; @@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio) } } -static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) +static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) { struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; @@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work) struct delay_c *dc; dc = container_of(work, struct delay_c, flush_expired_bios); - flush_bios(flush_delayed_bios(dc, 0)); + if (delay_is_fast(dc)) + flush_delayed_bios_fast(dc, false); + else + flush_bios(flush_delayed_bios(dc, false)); } static void delay_dtr(struct dm_target *ti) @@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti) dm_put_device(ti, dc->write.dev); if (dc->flush.dev) dm_put_device(ti, dc->flush.dev); + if (dc->worker) + kthread_stop(dc->worker); - mutex_destroy(&dc->timer_lock); + if (!delay_is_fast(dc)) + mutex_destroy(&dc->timer_lock); kfree(dc); } @@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct delay_c *dc; int ret; + unsigned int max_delay; if (argc != 3 && argc != 6 && argc != 9) { ti->error = "Requires exactly 3, 6 or 9 arguments"; @@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = dc; - timer_setup(&dc->delay_timer, handle_delayed_timer, 0); - INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); INIT_LIST_HEAD(&dc->delayed_bios); - mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); dc->argc = argc; ret = delay_class_ctr(ti, &dc->read, argv); if (ret) goto bad; + max_delay = dc->read.delay; if (argc == 3) { ret = delay_class_ctr(ti, &dc->write, argv); @@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ret = delay_class_ctr(ti, &dc->flush, argv); if (ret) goto bad; + max_delay = max(max_delay, dc->write.delay); + max_delay = max(max_delay, dc->flush.delay); goto out; } @@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ret = delay_class_ctr(ti, &dc->flush, argv + 3); if (ret) goto bad; + max_delay = max(max_delay, dc->flush.delay); goto out; } ret = delay_class_ctr(ti, &dc->flush, argv + 6); if (ret) goto bad; + max_delay = max(max_delay, dc->flush.delay); out: - dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); - if (!dc->kdelayd_wq) { - ret = -EINVAL; - DMERR("Couldn't start kdelayd"); - goto bad; + if (max_delay < 50) { + /* + * In case of small requested delays, use kthread instead of + * timers and workqueue to achieve better latency. + */ + dc->worker = kthread_create(&flush_worker_fn, dc, + "dm-delay-flush-worker"); + if (IS_ERR(dc->worker)) { + ret = PTR_ERR(dc->worker); + goto bad; + } + } else { + timer_setup(&dc->delay_timer, handle_delayed_timer, 0); + INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); + mutex_init(&dc->timer_lock); + dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); + if (!dc->kdelayd_wq) { + ret = -EINVAL; + DMERR("Couldn't start kdelayd"); + goto bad; + } } ti->num_flush_bios = 1; @@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) list_add_tail(&delayed->list, &dc->delayed_bios); mutex_unlock(&delayed_bios_lock); - queue_timeout(dc, expires); + if (delay_is_fast(dc)) + wake_up_process(dc->worker); + else + queue_timeout(dc, expires); return DM_MAPIO_SUBMITTED; } @@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti) struct delay_c *dc = ti->private; atomic_set(&dc->may_delay, 0); - del_timer_sync(&dc->delay_timer); - flush_bios(flush_delayed_bios(dc, 1)); + + if (delay_is_fast(dc)) + flush_delayed_bios_fast(dc, true); + else { + del_timer_sync(&dc->delay_timer); + flush_bios(flush_delayed_bios(dc, true)); + } } static void delay_resume(struct dm_target *ti) @@ -356,7 +429,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .features = DM_TARGET_PASSES_INTEGRITY, .module = THIS_MODULE, .ctr = delay_ctr, -- cgit From a951104333bd25bb6e5d0f5bee9cbf155b66fac1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 26 Oct 2023 14:12:05 +0900 Subject: dm error: Add support for zoned block devices dm-error is used in several test cases in the xfstests test suite to check the handling of IO errors in file systems. However, with several file systems getting native support for zoned block devices (e.g. btrfs and f2fs), dm-error's lack of zoned block device support creates problems as the file system attempts executing zone commands (e.g. a zone append operation) against a dm-error non-zoned block device, which causes various issues in the block layer (e.g. WARN_ON triggers). This commit adds supports for zoned block devices to dm-error, allowing a DM device table containing an error target to be exposed as a zoned block device (if all targets have a compatible zoned model support and mapping). This is done as follows: 1) Allow passing 2 arguments to an error target, similar to dm-linear: a backing device and a start sector. These arguments are optional and dm-error retains its characteristics if the arguments are not specified. 2) Implement the iterate_devices method so that dm-core can normally check the zone support and restrictions (e.g. zone alignment of the targets). When the backing device arguments are not specified, the iterate_devices method never calls the fn() argument. When no backing device is specified, as before, we assume that the DM device is not zoned. When the backing device arguments are specified, the zoned model of the DM device will depend on the backing device type: - If the backing device is zoned and its model and mapping is compatible with other targets of the device, the resulting device will be zoned, with the dm-error mapped portion always returning errors (similar to the default non-zoned case). - If the backing device is not zoned, then the DM device will not be either. This zone support for dm-error requires the definition of a functional report_zones operation so that dm_revalidate_zones() can operate correctly and resources for emulating zone append operations initialized. This is necessary for cases where dm-error is used to partially map a device and have an overall correct handling of zone append. This means that dm-error does not fail report zones operations. Two changes that are not obvious are included to avoid issues: 1) dm_table_supports_zoned_model() is changed to directly check if the backing device of a wildcard target (= dm-error target) is zoned. Otherwise, we wouldn't be able to catch the invalid setup of dm-error without a backing device (non zoned case) being combined with zoned targets. 2) dm_table_supports_dax() is modified to return false if the wildcard target is found. Otherwise, when dm-error is set without a backing device, we end up with a NULL pointer dereference in set_dax_synchronous (dax_dev is NULL). This is consistent with the current behavior because dm_table_supports_dax() always returned false for targets that do not define the iterate_devices method. Signed-off-by: Damien Le Moal Tested-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 23 ++++++++++- drivers/md/dm-target.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 37b48f63ae6a..198d38b53322 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -844,7 +844,8 @@ static bool dm_table_supports_dax(struct dm_table *t, if (!ti->type->direct_access) return false; - if (!ti->type->iterate_devices || + if (dm_target_is_wildcard(ti->type) || + !ti->type->iterate_devices || ti->type->iterate_devices(ti, iterate_fn, NULL)) return false; } @@ -1587,6 +1588,14 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, return blk_queue_zoned_model(q) != *zoned_model; } +static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return blk_queue_zoned_model(q) != BLK_ZONED_NONE; +} + /* * Check the device zoned model based on the target feature flag. If the target * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are @@ -1600,6 +1609,18 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); + /* + * For the wildcard target (dm-error), if we do not have a + * backing device, we must always return false. If we have a + * backing device, the result must depend on checking zoned + * model, like for any other target. So for this, check directly + * if the target backing device is zoned as we get "false" when + * dm-error was set without a backing device. + */ + if (dm_target_is_wildcard(ti->type) && + !ti->type->iterate_devices(ti, device_is_zoned_model, NULL)) + return false; + if (dm_target_supports_zoned_hm(ti->type)) { if (!ti->type->iterate_devices || ti->type->iterate_devices(ti, device_not_zoned_model, diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 27e2992ff249..0c4efb0bef8a 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -116,8 +116,62 @@ EXPORT_SYMBOL(dm_unregister_target); * io-err: always fails an io, useful for bringing * up LVs that have holes in them. */ +struct io_err_c { + struct dm_dev *dev; + sector_t start; +}; + +static int io_err_get_args(struct dm_target *tt, unsigned int argc, char **args) +{ + unsigned long long start; + struct io_err_c *ioec; + char dummy; + int ret; + + ioec = kmalloc(sizeof(*ioec), GFP_KERNEL); + if (!ioec) { + tt->error = "Cannot allocate io_err context"; + return -ENOMEM; + } + + ret = -EINVAL; + if (sscanf(args[1], "%llu%c", &start, &dummy) != 1 || + start != (sector_t)start) { + tt->error = "Invalid device sector"; + goto bad; + } + ioec->start = start; + + ret = dm_get_device(tt, args[0], dm_table_get_mode(tt->table), &ioec->dev); + if (ret) { + tt->error = "Device lookup failed"; + goto bad; + } + + tt->private = ioec; + + return 0; + +bad: + kfree(ioec); + + return ret; +} + static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) { + /* + * If we have arguments, assume it is the path to the backing + * block device and its mapping start sector (same as dm-linear). + * In this case, get the device so that we can get its limits. + */ + if (argc == 2) { + int ret = io_err_get_args(tt, argc, args); + + if (ret) + return ret; + } + /* * Return error for discards instead of -EOPNOTSUPP */ @@ -129,7 +183,12 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) static void io_err_dtr(struct dm_target *tt) { - /* empty */ + struct io_err_c *ioec = tt->private; + + if (ioec) { + dm_put_device(tt, ioec->dev); + kfree(ioec); + } } static int io_err_map(struct dm_target *tt, struct bio *bio) @@ -149,6 +208,45 @@ static void io_err_release_clone_rq(struct request *clone, { } +#ifdef CONFIG_BLK_DEV_ZONED +static sector_t io_err_map_sector(struct dm_target *ti, sector_t bi_sector) +{ + struct io_err_c *ioec = ti->private; + + return ioec->start + dm_target_offset(ti, bi_sector); +} + +static int io_err_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) +{ + struct io_err_c *ioec = ti->private; + + /* + * This should never be called when we do not have a backing device + * as that mean the target is not a zoned one. + */ + if (WARN_ON_ONCE(!ioec)) + return -EIO; + + return dm_report_zones(ioec->dev->bdev, ioec->start, + io_err_map_sector(ti, args->next_sector), + args, nr_zones); +} +#else +#define io_err_report_zones NULL +#endif + +static int io_err_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct io_err_c *ioec = ti->private; + + if (!ioec) + return 0; + + return fn(ti, ioec->dev, ioec->start, ti->len, data); +} + static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits) { limits->max_discard_sectors = UINT_MAX; @@ -165,15 +263,17 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, static struct target_type error_target = { .name = "error", - .version = {1, 6, 0}, - .features = DM_TARGET_WILDCARD, + .version = {1, 7, 0}, + .features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, .clone_and_map_rq = io_err_clone_and_map_rq, .release_clone_rq = io_err_release_clone_rq, + .iterate_devices = io_err_iterate_devices, .io_hints = io_err_io_hints, .direct_access = io_err_dax_direct_access, + .report_zones = io_err_report_zones, }; int __init dm_target_init(void) -- cgit From 6d0ee3b68092ef556703d7827ead3d1b7d275399 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 28 Oct 2023 21:59:23 -0700 Subject: dm crypt: use crypto_shash_digest() in crypt_iv_tcw_whitening() Simplify crypt_iv_tcw_whitening() by using crypto_shash_digest() instead of an init+update+final sequence. This should also improve performance. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8a03b3590733..4b5c54d0c51a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -652,13 +652,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, /* calculate crc32 for every 32bit part and xor it */ desc->tfm = tcw->crc32_tfm; for (i = 0; i < 4; i++) { - r = crypto_shash_init(desc); - if (r) - goto out; - r = crypto_shash_update(desc, &buf[i * 4], 4); - if (r) - goto out; - r = crypto_shash_final(desc, &buf[i * 4]); + r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]); if (r) goto out; } -- cgit From 070bb43ab01e891db1b742d4ddd7291c7f8d7022 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 28 Oct 2023 21:59:44 -0700 Subject: dm integrity: use crypto_shash_digest() in sb_mac() Simplify sb_mac() by using crypto_shash_digest() instead of an init+update+final sequence. This should also improve performance. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 97a8d5fc9ebb..e85c688fd91e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -493,42 +493,32 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) { SHASH_DESC_ON_STACK(desc, ic->journal_mac); int r; - unsigned int size = crypto_shash_digestsize(ic->journal_mac); + unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac); + __u8 *sb = (__u8 *)ic->sb; + __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size; - if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) { + if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) { dm_integrity_io_error(ic, "digest is too long", -EINVAL); return -EINVAL; } desc->tfm = ic->journal_mac; - r = crypto_shash_init(desc); - if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_init", r); - return r; - } - - r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size); - if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_update", r); - return r; - } - if (likely(wr)) { - r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size); + r = crypto_shash_digest(desc, sb, mac - sb, mac); if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_final", r); + dm_integrity_io_error(ic, "crypto_shash_digest", r); return r; } } else { - __u8 result[HASH_MAX_DIGESTSIZE]; + __u8 actual_mac[HASH_MAX_DIGESTSIZE]; - r = crypto_shash_final(desc, result); + r = crypto_shash_digest(desc, sb, mac - sb, actual_mac); if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_final", r); + dm_integrity_io_error(ic, "crypto_shash_digest", r); return r; } - if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) { + if (memcmp(mac, actual_mac, mac_size)) { dm_integrity_io_error(ic, "superblock mac", -EILSEQ); dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); return -EILSEQ; -- cgit From 9793c269da6cd339757de6ba5b2c8681b54c99af Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 31 Oct 2023 19:12:54 +0100 Subject: dm crypt: account large pages in cc->n_allocated_pages The commit 5054e778fcd9c ("dm crypt: allocate compound pages if possible") changed dm-crypt to use compound pages to improve performance. Unfortunately, there was an oversight: the allocation of compound pages was not accounted at all. Normal pages are accounted in a percpu counter cc->n_allocated_pages and dm-crypt is limited to allocate at most 2% of memory. Because compound pages were not accounted at all, dm-crypt could allocate memory over the 2% limit. Fix this by adding the accounting of compound pages, so that memory consumption of dm-crypt is properly limited. Signed-off-by: Mikulas Patocka Fixes: 5054e778fcd9c ("dm crypt: allocate compound pages if possible") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4b5c54d0c51a..37dcc113baa1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1692,11 +1692,17 @@ retry: order = min(order, remaining_order); while (order > 0) { + if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) + + (1 << order) > dm_crypt_pages_per_client)) + goto decrease_order; pages = alloc_pages(gfp_mask | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP, order); - if (likely(pages != NULL)) + if (likely(pages != NULL)) { + percpu_counter_add(&cc->n_allocated_pages, 1 << order); goto have_pages; + } +decrease_order: order--; } @@ -1734,10 +1740,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */ bio_for_each_folio_all(fi, clone) { - if (folio_test_large(fi.folio)) + if (folio_test_large(fi.folio)) { + percpu_counter_sub(&cc->n_allocated_pages, + 1 << folio_order(fi.folio)); folio_put(fi.folio); - else + } else { mempool_free(&fi.folio->page, &cc->page_pool); + } } } } -- cgit