From c4d097d13052d1e6f29b8798264aed6135d99568 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Fri, 23 Jun 2017 17:27:01 +0200 Subject: dm raid: fix oops on upgrading to extended superblock format When a RAID set was created on dm-raid version < 1.9.0 (old RAID superblock format), all of the new 1.9.0 members of the superblock are uninitialized (zero) -- including the device sectors member needed to support shrinking. All the other accesses to superblock fields new in 1.9.0 were reviewed and verified to be properly guarded against invalid use. The 'sectors' member was the only one used when the superblock version is < 1.9. Don't access the superblock's >= 1.9.0 'sectors' member unconditionally. Also add respective comments. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7d893228c40f..b4b75dad816a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1927,7 +1927,7 @@ struct dm_raid_superblock { /******************************************************************** * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!! * - * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist + * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist */ __le32 flags; /* Flags defining array states for reshaping */ @@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->layout = cpu_to_le32(mddev->layout); sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); + /******************************************************************** + * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!! + * + * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist + */ sb->new_level = cpu_to_le32(mddev->new_level); sb->new_layout = cpu_to_le32(mddev->new_layout); sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors); @@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; if (!test_and_clear_bit(FirstUse, &rdev->flags)) { - /* Retrieve device size stored in superblock to be prepared for shrink */ - rdev->sectors = le64_to_cpu(sb->sectors); + /* + * Retrieve rdev size stored in superblock to be prepared for shrink. + * Check extended superblock members are present otherwise the size + * will not be set! + */ + if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) + rdev->sectors = le64_to_cpu(sb->sectors); + rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); if (rdev->recovery_offset == MaxSector) set_bit(In_sync, &rdev->flags); -- cgit From 00a0ea33b495ee6149bf5a77ac5807ce87323abb Mon Sep 17 00:00:00 2001 From: Vallish Vaidyeshwara Date: Fri, 23 Jun 2017 18:53:06 +0000 Subject: dm thin: do not queue freed thin mapping for next stage processing process_prepared_discard_passdown_pt1() should cleanup dm_thin_new_mapping in cases of error. dm_pool_inc_data_range() can fail trying to get a block reference: metadata operation 'dm_pool_inc_data_range' failed: error = -61 When dm_pool_inc_data_range() fails, dm thin aborts current metadata transaction and marks pool as PM_READ_ONLY. Memory for thin mapping is released as well. However, current thin mapping will be queued onto next stage as part of queue_passdown_pt2() or passdown_endio(). This dangling thin mapping memory when processed and accessed in next stage will lead to device mapper crashing. Code flow without fix: -> process_prepared_discard_passdown_pt1(m) -> dm_thin_remove_range() -> discard passdown --> passdown_endio(m) queues m onto next stage -> dm_pool_inc_data_range() fails, frees memory m but does not remove it from next stage queue -> process_prepared_discard_passdown_pt2(m) -> processes freed memory m and crashes One such stack: Call Trace: [] dm_cell_release_no_holder+0x2f/0x70 [dm_bio_prison] [] cell_defer_no_holder+0x3c/0x80 [dm_thin_pool] [] process_prepared_discard_passdown_pt2+0x4b/0x90 [dm_thin_pool] [] process_prepared+0x81/0xa0 [dm_thin_pool] [] do_worker+0xc5/0x820 [dm_thin_pool] [] ? __schedule+0x244/0x680 [] ? pwq_activate_delayed_work+0x42/0xb0 [] process_one_work+0x153/0x3f0 [] worker_thread+0x12b/0x4b0 [] ? rescuer_thread+0x350/0x350 [] kthread+0xca/0xe0 [] ? kthread_park+0x60/0x60 [] ret_from_fork+0x25/0x30 The fix is to first take the block ref count for discarded block and then do a passdown discard of this block. If block ref count fails, then bail out aborting current metadata transaction, mark pool as PM_READ_ONLY and also free current thin mapping memory (existing error handling code) without queueing this thin mapping onto next stage of processing. If block ref count succeeds, then passdown discard of this block. Discard callback of passdown_endio() will queue this thin mapping onto next stage of processing. Code flow with fix: -> process_prepared_discard_passdown_pt1(m) -> dm_thin_remove_range() -> dm_pool_inc_data_range() --> if fails, free memory m and bail out -> discard passdown --> passdown_endio(m) queues m onto next stage Cc: stable # v4.9+ Reviewed-by: Eduardo Valentin Reviewed-by: Cristian Gafton Reviewed-by: Anchal Agarwal Signed-off-by: Vallish Vaidyeshwara Reviewed-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 17ad50daed08..28808e5ec0fd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1094,6 +1094,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) return; } + /* + * Increment the unmapped blocks. This prevents a race between the + * passdown io and reallocation of freed blocks. + */ + r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end); + if (r) { + metadata_operation_failed(pool, "dm_pool_inc_data_range", r); + bio_io_error(m->bio); + cell_defer_no_holder(tc, m->cell); + mempool_free(m, pool->mapping_pool); + return; + } + discard_parent = bio_alloc(GFP_NOIO, 1); if (!discard_parent) { DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.", @@ -1114,19 +1127,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) end_discard(&op, r); } } - - /* - * Increment the unmapped blocks. This prevents a race between the - * passdown io and reallocation of freed blocks. - */ - r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end); - if (r) { - metadata_operation_failed(pool, "dm_pool_inc_data_range", r); - bio_io_error(m->bio); - cell_defer_no_holder(tc, m->cell); - mempool_free(m, pool->mapping_pool); - return; - } } static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m) -- cgit