From 3bd548e5b819b8c0f2c9085de775c5c7bff9052f Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Fri, 16 Sep 2022 16:33:05 -0700 Subject: drivers/md/md-bitmap: check the return value of md_bitmap_get_counter() Check the return value of md_bitmap_get_counter() in case it returns NULL pointer, which will result in a null pointer dereference. v2: update the check to include other dereference Signed-off-by: Li Zhong Signed-off-by: Song Liu --- drivers/md/md-bitmap.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index bf6dffadbe6f..63ece30114e5 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2195,20 +2195,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (set) { bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); - if (*bmc_new == 0) { - /* need to set on-disk bits too. */ - sector_t end = block + new_blocks; - sector_t start = block >> chunkshift; - start <<= chunkshift; - while (start < end) { - md_bitmap_file_set_bit(bitmap, block); - start += 1 << chunkshift; + if (bmc_new) { + if (*bmc_new == 0) { + /* need to set on-disk bits too. */ + sector_t end = block + new_blocks; + sector_t start = block >> chunkshift; + + start <<= chunkshift; + while (start < end) { + md_bitmap_file_set_bit(bitmap, block); + start += 1 << chunkshift; + } + *bmc_new = 2; + md_bitmap_count_page(&bitmap->counts, block, 1); + md_bitmap_set_pending(&bitmap->counts, block); } - *bmc_new = 2; - md_bitmap_count_page(&bitmap->counts, block, 1); - md_bitmap_set_pending(&bitmap->counts, block); + *bmc_new |= NEEDED_MASK; } - *bmc_new |= NEEDED_MASK; if (new_blocks < old_blocks) old_blocks = new_blocks; } -- cgit From 9487a0f6855c1a28e4e39d41545cd19ed417c015 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 20 Oct 2022 17:51:04 +0200 Subject: raid5-cache: use try_cmpxchg in r5l_wake_reclaim Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in r5l_wake_reclaim. 86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails. There is no need to re-read the value in the loop. Note that the value from *ptr should be read using READ_ONCE to prevent the compiler from merging, refetching or reordering the read. No functional change intended. Cc: Song Liu Signed-off-by: Uros Bizjak Signed-off-by: Song Liu --- drivers/md/raid5-cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 832d8566e165..a63023aae21e 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1565,11 +1565,12 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space) if (!log) return; + + target = READ_ONCE(log->reclaim_target); do { - target = log->reclaim_target; if (new < target) return; - } while (cmpxchg(&log->reclaim_target, target, new) != target); + } while (!try_cmpxchg(&log->reclaim_target, &target, new)); md_wakeup_thread(log->reclaim_thread); } -- cgit From 2f6d261e15e8d09a22192aa943c5ed8a792f04ef Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 20 Sep 2022 10:39:37 +0800 Subject: md: factor out __md_set_array_info() Factor out __md_set_array_info(). No functional change. Signed-off-by: Ye Bin Signed-off-by: Song Liu --- drivers/md/md.c | 65 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 30 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index a467b492d4ad..b0c3e7d3adbc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7464,6 +7464,40 @@ static inline bool md_ioctl_valid(unsigned int cmd) } } +static int __md_set_array_info(struct mddev *mddev, void __user *argp) +{ + mdu_array_info_t info; + int err; + + if (!argp) + memset(&info, 0, sizeof(info)); + else if (copy_from_user(&info, argp, sizeof(info))) + return -EFAULT; + + if (mddev->pers) { + err = update_array_info(mddev, &info); + if (err) + pr_warn("md: couldn't update array info. %d\n", err); + return err; + } + + if (!list_empty(&mddev->disks)) { + pr_warn("md: array %s already has disks!\n", mdname(mddev)); + return -EBUSY; + } + + if (mddev->raid_disks) { + pr_warn("md: array %s already initialised!\n", mdname(mddev)); + return -EBUSY; + } + + err = md_set_array_info(mddev, &info); + if (err) + pr_warn("md: couldn't set array info. %d\n", err); + + return err; +} + static int md_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -7569,36 +7603,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } if (cmd == SET_ARRAY_INFO) { - mdu_array_info_t info; - if (!arg) - memset(&info, 0, sizeof(info)); - else if (copy_from_user(&info, argp, sizeof(info))) { - err = -EFAULT; - goto unlock; - } - if (mddev->pers) { - err = update_array_info(mddev, &info); - if (err) { - pr_warn("md: couldn't update array info. %d\n", err); - goto unlock; - } - goto unlock; - } - if (!list_empty(&mddev->disks)) { - pr_warn("md: array %s already has disks!\n", mdname(mddev)); - err = -EBUSY; - goto unlock; - } - if (mddev->raid_disks) { - pr_warn("md: array %s already initialised!\n", mdname(mddev)); - err = -EBUSY; - goto unlock; - } - err = md_set_array_info(mddev, &info); - if (err) { - pr_warn("md: couldn't set array info. %d\n", err); - goto unlock; - } + err = __md_set_array_info(mddev, argp); goto unlock; } -- cgit From f97a5528b21eb175d90dce2df9960c8d08e1be82 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 20 Sep 2022 10:39:38 +0800 Subject: md: introduce md_ro_state Introduce md_ro_state for mddev->ro, so it is easy to understand. Signed-off-by: Ye Bin Signed-off-by: Song Liu --- drivers/md/md.c | 152 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 82 insertions(+), 70 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index b0c3e7d3adbc..6f3b2c1cb6cd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); static void mddev_detach(struct mddev *mddev); +enum md_ro_state { + MD_RDWR, + MD_RDONLY, + MD_AUTO_READ, + MD_MAX_STATE +}; + +static bool md_is_rdwr(struct mddev *mddev) +{ + return (mddev->ro == MD_RDWR); +} + /* * Default number of read corrections we'll attempt on an rdev * before ejecting it from the array. We divide the read error @@ -444,7 +456,7 @@ static void md_submit_bio(struct bio *bio) bio = bio_split_to_limits(bio); - if (mddev->ro == 1 && unlikely(rw == WRITE)) { + if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); @@ -2639,7 +2651,7 @@ void md_update_sb(struct mddev *mddev, int force_change) int any_badblocks_changed = 0; int ret = -1; - if (mddev->ro) { + if (!md_is_rdwr(mddev)) { if (force_change) set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); return; @@ -3901,7 +3913,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) goto out_unlock; } rv = -EROFS; - if (mddev->ro) + if (!md_is_rdwr(mddev)) goto out_unlock; /* request to change the personality. Need to ensure: @@ -4107,7 +4119,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) { if (mddev->pers->check_reshape == NULL) err = -EBUSY; - else if (mddev->ro) + else if (!md_is_rdwr(mddev)) err = -EROFS; else { mddev->new_layout = n; @@ -4216,7 +4228,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) { if (mddev->pers->check_reshape == NULL) err = -EBUSY; - else if (mddev->ro) + else if (!md_is_rdwr(mddev)) err = -EROFS; else { mddev->new_chunk_sectors = n >> 9; @@ -4339,13 +4351,13 @@ array_state_show(struct mddev *mddev, char *page) if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) { switch(mddev->ro) { - case 1: + case MD_RDONLY: st = readonly; break; - case 2: + case MD_AUTO_READ: st = read_auto; break; - case 0: + case MD_RDWR: spin_lock(&mddev->lock); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) st = write_pending; @@ -4381,7 +4393,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) int err = 0; enum array_state st = match_word(buf, array_states); - if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { + if (mddev->pers && (st == active || st == clean) && + mddev->ro != MD_RDONLY) { /* don't take reconfig_mutex when toggling between * clean and active */ @@ -4425,23 +4438,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) err = md_set_readonly(mddev, NULL); else { - mddev->ro = 1; + mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); err = do_md_run(mddev); } break; case read_auto: if (mddev->pers) { - if (mddev->ro == 0) + if (md_is_rdwr(mddev)) err = md_set_readonly(mddev, NULL); - else if (mddev->ro == 1) + else if (mddev->ro == MD_RDONLY) err = restart_array(mddev); if (err == 0) { - mddev->ro = 2; + mddev->ro = MD_AUTO_READ; set_disk_ro(mddev->gendisk, 0); } } else { - mddev->ro = 2; + mddev->ro = MD_AUTO_READ; err = do_md_run(mddev); } break; @@ -4466,7 +4479,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) wake_up(&mddev->sb_wait); err = 0; } else { - mddev->ro = 0; + mddev->ro = MD_RDWR; set_disk_ro(mddev->gendisk, 0); err = do_md_run(mddev); } @@ -4765,7 +4778,7 @@ action_show(struct mddev *mddev, char *page) if (test_bit(MD_RECOVERY_FROZEN, &recovery)) type = "frozen"; else if (test_bit(MD_RECOVERY_RUNNING, &recovery) || - (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) { + (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) { if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) type = "reshape"; else if (test_bit(MD_RECOVERY_SYNC, &recovery)) { @@ -4851,11 +4864,11 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); } - if (mddev->ro == 2) { + if (mddev->ro == MD_AUTO_READ) { /* A write to sync_action is enough to justify * canceling read-auto mode */ - mddev->ro = 0; + mddev->ro = MD_RDWR; md_wakeup_thread(mddev->sync_thread); } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5083,8 +5096,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len) goto out_unlock; err = -EBUSY; - if (max < mddev->resync_max && - mddev->ro == 0 && + if (max < mddev->resync_max && md_is_rdwr(mddev) && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) goto out_unlock; @@ -5813,8 +5825,8 @@ int md_run(struct mddev *mddev) continue; sync_blockdev(rdev->bdev); invalidate_bdev(rdev->bdev); - if (mddev->ro != 1 && rdev_read_only(rdev)) { - mddev->ro = 1; + if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) { + mddev->ro = MD_RDONLY; if (mddev->gendisk) set_disk_ro(mddev->gendisk, 1); } @@ -5917,8 +5929,8 @@ int md_run(struct mddev *mddev) mddev->ok_start_degraded = start_dirty_degraded; - if (start_readonly && mddev->ro == 0) - mddev->ro = 2; /* read-only, but switch on first write */ + if (start_readonly && md_is_rdwr(mddev)) + mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */ err = pers->run(mddev); if (err) @@ -5996,8 +6008,8 @@ int md_run(struct mddev *mddev) mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed"); mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); - } else if (mddev->ro == 2) /* auto-readonly not meaningful */ - mddev->ro = 0; + } else if (mddev->ro == MD_AUTO_READ) + mddev->ro = MD_RDWR; atomic_set(&mddev->max_corr_read_errors, MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); @@ -6015,7 +6027,7 @@ int md_run(struct mddev *mddev) if (rdev->raid_disk >= 0) sysfs_link_rdev(mddev, rdev); /* failure here is OK */ - if (mddev->degraded && !mddev->ro) + if (mddev->degraded && md_is_rdwr(mddev)) /* This ensures that recovering status is reported immediately * via sysfs - until a lack of spares is confirmed. */ @@ -6105,7 +6117,7 @@ static int restart_array(struct mddev *mddev) return -ENXIO; if (!mddev->pers) return -EINVAL; - if (!mddev->ro) + if (md_is_rdwr(mddev)) return -EBUSY; rcu_read_lock(); @@ -6124,7 +6136,7 @@ static int restart_array(struct mddev *mddev) return -EROFS; mddev->safemode = 0; - mddev->ro = 0; + mddev->ro = MD_RDWR; set_disk_ro(disk, 0); pr_debug("md: %s switched to read-write mode.\n", mdname(mddev)); /* Kick recovery or resync if necessary */ @@ -6151,7 +6163,7 @@ static void md_clean(struct mddev *mddev) mddev->clevel[0] = 0; mddev->flags = 0; mddev->sb_flags = 0; - mddev->ro = 0; + mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; @@ -6203,7 +6215,7 @@ static void __md_stop_writes(struct mddev *mddev) } md_bitmap_flush(mddev); - if (mddev->ro == 0 && + if (md_is_rdwr(mddev) && ((!mddev->in_sync && !mddev_is_clustered(mddev)) || mddev->sb_flags)) { /* mark array as shutdown cleanly */ @@ -6312,9 +6324,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) __md_stop_writes(mddev); err = -ENXIO; - if (mddev->ro==1) + if (mddev->ro == MD_RDONLY) goto out; - mddev->ro = 1; + mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -6371,7 +6383,7 @@ static int do_md_stop(struct mddev *mddev, int mode, return -EBUSY; } if (mddev->pers) { - if (mddev->ro) + if (!md_is_rdwr(mddev)) set_disk_ro(disk, 0); __md_stop_writes(mddev); @@ -6388,8 +6400,8 @@ static int do_md_stop(struct mddev *mddev, int mode, mutex_unlock(&mddev->open_mutex); mddev->changed = 1; - if (mddev->ro) - mddev->ro = 0; + if (!md_is_rdwr(mddev)) + mddev->ro = MD_RDWR; } else mutex_unlock(&mddev->open_mutex); /* @@ -7204,7 +7216,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || mddev->sync_thread) return -EBUSY; - if (mddev->ro) + if (!md_is_rdwr(mddev)) return -EROFS; rdev_for_each(rdev, mddev) { @@ -7234,7 +7246,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) /* change the number of raid disks */ if (mddev->pers->check_reshape == NULL) return -EINVAL; - if (mddev->ro) + if (!md_is_rdwr(mddev)) return -EROFS; if (raid_disks <= 0 || (mddev->max_disks && raid_disks >= mddev->max_disks)) @@ -7663,26 +7675,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, * The remaining ioctls are changing the state of the * superblock, so we do not allow them on read-only arrays. */ - if (mddev->ro && mddev->pers) { - if (mddev->ro == 2) { - mddev->ro = 0; - sysfs_notify_dirent_safe(mddev->sysfs_state); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - /* mddev_unlock will wake thread */ - /* If a device failed while we were read-only, we - * need to make sure the metadata is updated now. - */ - if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) { - mddev_unlock(mddev); - wait_event(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) && - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); - mddev_lock_nointr(mddev); - } - } else { + if (!md_is_rdwr(mddev) && mddev->pers) { + if (mddev->ro != MD_AUTO_READ) { err = -EROFS; goto unlock; } + mddev->ro = MD_RDWR; + sysfs_notify_dirent_safe(mddev->sysfs_state); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + /* mddev_unlock will wake thread */ + /* If a device failed while we were read-only, we + * need to make sure the metadata is updated now. + */ + if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) { + mddev_unlock(mddev); + wait_event(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) && + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); + mddev_lock_nointr(mddev); + } } switch (cmd) { @@ -7768,11 +7779,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro) * Transitioning to read-auto need only happen for arrays that call * md_write_start and which are not ready for writes yet. */ - if (!ro && mddev->ro == 1 && mddev->pers) { + if (!ro && mddev->ro == MD_RDONLY && mddev->pers) { err = restart_array(mddev); if (err) goto out_unlock; - mddev->ro = 2; + mddev->ro = MD_AUTO_READ; } out_unlock: @@ -8246,9 +8257,9 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%s : %sactive", mdname(mddev), mddev->pers ? "" : "in"); if (mddev->pers) { - if (mddev->ro==1) + if (mddev->ro == MD_RDONLY) seq_printf(seq, " (read-only)"); - if (mddev->ro==2) + if (mddev->ro == MD_AUTO_READ) seq_printf(seq, " (auto-read-only)"); seq_printf(seq, " %s", mddev->pers->name); } @@ -8507,10 +8518,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) if (bio_data_dir(bi) != WRITE) return true; - BUG_ON(mddev->ro == 1); - if (mddev->ro == 2) { + BUG_ON(mddev->ro == MD_RDONLY); + if (mddev->ro == MD_AUTO_READ) { /* need to switch to read/write */ - mddev->ro = 0; + mddev->ro = MD_RDWR; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); @@ -8561,7 +8572,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi) { if (bio_data_dir(bi) != WRITE) return; - WARN_ON_ONCE(mddev->in_sync || mddev->ro); + WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev)); percpu_ref_get(&mddev->writes_pending); } EXPORT_SYMBOL(md_write_inc); @@ -8666,7 +8677,7 @@ void md_allow_write(struct mddev *mddev) { if (!mddev->pers) return; - if (mddev->ro) + if (!md_is_rdwr(mddev)) return; if (!mddev->pers->sync_request) return; @@ -8714,7 +8725,7 @@ void md_do_sync(struct md_thread *thread) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) || test_bit(MD_RECOVERY_WAIT, &mddev->recovery)) return; - if (mddev->ro) {/* never try to sync a read-only array */ + if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); return; } @@ -9183,9 +9194,9 @@ static int remove_and_add_spares(struct mddev *mddev, if (test_bit(Faulty, &rdev->flags)) continue; if (!test_bit(Journal, &rdev->flags)) { - if (mddev->ro && - ! (rdev->saved_raid_disk >= 0 && - !test_bit(Bitmap_sync, &rdev->flags))) + if (!md_is_rdwr(mddev) && + !(rdev->saved_raid_disk >= 0 && + !test_bit(Bitmap_sync, &rdev->flags))) continue; rdev->recovery_offset = 0; @@ -9283,7 +9294,8 @@ void md_check_recovery(struct mddev *mddev) flush_signals(current); } - if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + if (!md_is_rdwr(mddev) && + !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( (mddev->sb_flags & ~ (1<external && mddev->safemode == 1) mddev->safemode = 0; - if (mddev->ro) { + if (!md_is_rdwr(mddev)) { struct md_rdev *rdev; if (!mddev->external && mddev->in_sync) /* 'Blocked' flag not needed as failed devices -- cgit From 4555211190798b6b6fa2c37667d175bf67945c78 Mon Sep 17 00:00:00 2001 From: Florian-Ewald Mueller Date: Tue, 25 Oct 2022 09:37:05 +0200 Subject: md/bitmap: Fix bitmap chunk size overflow issues - limit bitmap chunk size internal u64 variable to values not overflowing the u32 bitmap superblock structure variable stored on persistent media - assign bitmap chunk size internal u64 variable from unsigned values to avoid possible sign extension artifacts when assigning from a s32 value The bug has been there since at least kernel 4.0. Steps to reproduce it: 1: mdadm -C /dev/mdx -l 1 --bitmap=internal --bitmap-chunk=256M -e 1.2 -n2 /dev/rnbd1 /dev/rnbd2 2 resize member device rnbd1 and rnbd2 to 8 TB 3 mdadm --grow /dev/mdx --size=max The bitmap_chunksize will overflow without patch. Cc: stable@vger.kernel.org Signed-off-by: Florian-Ewald Mueller Signed-off-by: Jack Wang Signed-off-by: Song Liu --- drivers/md/md-bitmap.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 63ece30114e5..e7cc6ba1b657 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap) sb = kmap_atomic(bitmap->storage.sb_page); pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); - pr_debug(" version: %d\n", le32_to_cpu(sb->version)); + pr_debug(" version: %u\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", le32_to_cpu(*(__le32 *)(sb->uuid+0)), le32_to_cpu(*(__le32 *)(sb->uuid+4)), @@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap) pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); - pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); - pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); + pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); + pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); - pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); + pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } @@ -2105,7 +2105,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bytes = DIV_ROUND_UP(chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); - } while (bytes > (space << 9)); + } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < + (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); } else chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; @@ -2150,7 +2151,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = pages; bitmap->counts.chunkshift = chunkshift; bitmap->counts.chunks = chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + + bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + BITMAP_BLOCK_SHIFT); blocks = min(old_counts.chunks << old_counts.chunkshift, @@ -2176,8 +2177,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = old_counts.pages; bitmap->counts.chunkshift = old_counts.chunkshift; bitmap->counts.chunks = old_counts.chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + - BITMAP_BLOCK_SHIFT); + bitmap->mddev->bitmap_info.chunksize = + 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; @@ -2537,6 +2538,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) if (csize < 512 || !is_power_of_2(csize)) return -EINVAL; + if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * + sizeof(((bitmap_super_t *)0)->chunksize)))) + return -EOVERFLOW; mddev->bitmap_info.chunksize = csize; return len; } -- cgit From 8e1a2279ca2b0485cc379a153d02a9793f74a48f Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 2 Nov 2022 10:07:30 +0800 Subject: md/raid0, raid10: Don't set discard sectors for request queue It should use disk_stack_limits to get a proper max_discard_sectors rather than setting a value by stack drivers. And there is a bug. If all member disks are rotational devices, raid0/raid10 set max_discard_sectors. So the member devices are not ssd/nvme, but raid0/raid10 export the wrong value. It reports warning messages in function __blkdev_issue_discard when mkfs.xfs like this: [ 4616.022599] ------------[ cut here ]------------ [ 4616.027779] WARNING: CPU: 4 PID: 99634 at block/blk-lib.c:50 __blkdev_issue_discard+0x16a/0x1a0 [ 4616.140663] RIP: 0010:__blkdev_issue_discard+0x16a/0x1a0 [ 4616.146601] Code: 24 4c 89 20 31 c0 e9 fe fe ff ff c1 e8 09 8d 48 ff 4c 89 f0 4c 09 e8 48 85 c1 0f 84 55 ff ff ff b8 ea ff ff ff e9 df fe ff ff <0f> 0b 48 8d 74 24 08 e8 ea d6 00 00 48 c7 c6 20 1e 89 ab 48 c7 c7 [ 4616.167567] RSP: 0018:ffffaab88cbffca8 EFLAGS: 00010246 [ 4616.173406] RAX: ffff9ba1f9e44678 RBX: 0000000000000000 RCX: ffff9ba1c9792080 [ 4616.181376] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9ba1c9792080 [ 4616.189345] RBP: 0000000000000cc0 R08: ffffaab88cbffd10 R09: 0000000000000000 [ 4616.197317] R10: 0000000000000012 R11: 0000000000000000 R12: 0000000000000000 [ 4616.205288] R13: 0000000000400000 R14: 0000000000000cc0 R15: ffff9ba1c9792080 [ 4616.213259] FS: 00007f9a5534e980(0000) GS:ffff9ba1b7c80000(0000) knlGS:0000000000000000 [ 4616.222298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4616.228719] CR2: 000055a390a4c518 CR3: 0000000123e40006 CR4: 00000000001706e0 [ 4616.236689] Call Trace: [ 4616.239428] blkdev_issue_discard+0x52/0xb0 [ 4616.244108] blkdev_common_ioctl+0x43c/0xa00 [ 4616.248883] blkdev_ioctl+0x116/0x280 [ 4616.252977] __x64_sys_ioctl+0x8a/0xc0 [ 4616.257163] do_syscall_64+0x5c/0x90 [ 4616.261164] ? handle_mm_fault+0xc5/0x2a0 [ 4616.265652] ? do_user_addr_fault+0x1d8/0x690 [ 4616.270527] ? do_syscall_64+0x69/0x90 [ 4616.274717] ? exc_page_fault+0x62/0x150 [ 4616.279097] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 4616.284748] RIP: 0033:0x7f9a55398c6b Signed-off-by: Xiao Ni Reported-by: Yi Zhang Reviewed-by: Ming Lei Signed-off-by: Song Liu --- drivers/md/raid0.c | 1 - drivers/md/raid10.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 857c49399c28..b536befd8898 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -398,7 +398,6 @@ static int raid0_run(struct mddev *mddev) blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_discard_sectors(mddev->queue, UINT_MAX); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); blk_queue_io_opt(mddev->queue, diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3aa8b6e11d58..9a6503f5cb98 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4145,8 +4145,6 @@ static int raid10_run(struct mddev *mddev) conf->thread = NULL; if (mddev->queue) { - blk_queue_max_discard_sectors(mddev->queue, - UINT_MAX); blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); raid10_set_io_opt(conf); -- cgit From 341097ee53573e06ab9fc675d96a052385b851fa Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 4 Nov 2022 09:53:38 -0400 Subject: md: fix a crash in mempool_free There's a crash in mempool_free when running the lvm test shell/lvchange-rebuild-raid.sh. The reason for the crash is this: * super_written calls atomic_dec_and_test(&mddev->pending_writes) and wake_up(&mddev->sb_wait). Then it calls rdev_dec_pending(rdev, mddev) and bio_put(bio). * so, the process that waited on sb_wait and that is woken up is racing with bio_put(bio). * if the process wins the race, it calls bioset_exit before bio_put(bio) is executed. * bio_put(bio) attempts to free a bio into a destroyed bio set - causing a crash in mempool_free. We fix this bug by moving bio_put before atomic_dec_and_test. We also move rdev_dec_pending before atomic_dec_and_test as suggested by Neil Brown. The function md_end_flush has a similar bug - we must call bio_put before we decrement the number of in-progress bios. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 11557f0067 P4D 11557f0067 PUD 0 Oops: 0002 [#1] PREEMPT SMP CPU: 0 PID: 73 Comm: kworker/0:1 Not tainted 6.1.0-rc3 #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Workqueue: kdelayd flush_expired_bios [dm_delay] RIP: 0010:mempool_free+0x47/0x80 Code: 48 89 ef 5b 5d ff e0 f3 c3 48 89 f7 e8 32 45 3f 00 48 63 53 08 48 89 c6 3b 53 04 7d 2d 48 8b 43 10 8d 4a 01 48 89 df 89 4b 08 <48> 89 2c d0 e8 b0 45 3f 00 48 8d 7b 30 5b 5d 31 c9 ba 01 00 00 00 RSP: 0018:ffff88910036bda8 EFLAGS: 00010093 RAX: 0000000000000000 RBX: ffff8891037b65d8 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff8891037b65d8 RBP: ffff8891447ba240 R08: 0000000000012908 R09: 00000000003d0900 R10: 0000000000000000 R11: 0000000000173544 R12: ffff889101a14000 R13: ffff8891562ac300 R14: ffff889102b41440 R15: ffffe8ffffa00d05 FS: 0000000000000000(0000) GS:ffff88942fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000001102e99000 CR4: 00000000000006b0 Call Trace: clone_endio+0xf4/0x1c0 [dm_mod] clone_endio+0xf4/0x1c0 [dm_mod] __submit_bio+0x76/0x120 submit_bio_noacct_nocheck+0xb6/0x2a0 flush_expired_bios+0x28/0x2f [dm_delay] process_one_work+0x1b4/0x300 worker_thread+0x45/0x3e0 ? rescuer_thread+0x380/0x380 kthread+0xc2/0x100 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Modules linked in: brd dm_delay dm_raid dm_mod af_packet uvesafb cfbfillrect cfbimgblt cn cfbcopyarea fb font fbdev tun autofs4 binfmt_misc configfs ipv6 virtio_rng virtio_balloon rng_core virtio_net pcspkr net_failover failover qemu_fw_cfg button mousedev raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod sd_mod t10_pi crc64_rocksoft crc64 virtio_scsi scsi_mod evdev psmouse bsg scsi_common [last unloaded: brd] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Song Liu --- drivers/md/md.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 6f3b2c1cb6cd..f95b86440485 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -521,13 +521,14 @@ static void md_end_flush(struct bio *bio) struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; + bio_put(bio); + rdev_dec_pending(rdev, mddev); if (atomic_dec_and_test(&mddev->flush_pending)) { /* The pre-request flush has finished */ queue_work(md_wq, &mddev->flush_work); } - bio_put(bio); } static void md_submit_flush_data(struct work_struct *ws); @@ -925,10 +926,12 @@ static void super_written(struct bio *bio) } else clear_bit(LastDev, &rdev->flags); + bio_put(bio); + + rdev_dec_pending(rdev, mddev); + if (atomic_dec_and_test(&mddev->pending_writes)) wake_up(&mddev->sb_wait); - rdev_dec_pending(rdev, mddev); - bio_put(bio); } void md_super_write(struct mddev *mddev, struct md_rdev *rdev, -- cgit From ad831a16b08c3f1a1f28a56d2054313d7d521da9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Nov 2022 11:10:37 +0100 Subject: md/raid5: use bdev_write_cache instead of open coding it Use the bdev_write_cache instead of two equivalent open coded checks. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/raid5-cache.c | 5 +---- drivers/md/raid5-ppl.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index a63023aae21e..46182b955aef 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -3062,7 +3062,6 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev) int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { - struct request_queue *q = bdev_get_queue(rdev->bdev); struct r5l_log *log; int ret; @@ -3091,9 +3090,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) if (!log) return -ENOMEM; log->rdev = rdev; - - log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0; - + log->need_cache_flush = bdev_write_cache(rdev->bdev); log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, sizeof(rdev->mddev->uuid)); diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 31b9157bc9ae..e495939bb3e0 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1301,8 +1301,6 @@ static int ppl_validate_rdev(struct md_rdev *rdev) static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) { - struct request_queue *q; - if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + PPL_HEADER_SIZE) * 2) { log->use_multippl = true; @@ -1316,8 +1314,7 @@ static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev) } log->next_io_sector = rdev->ppl.sector; - q = bdev_get_queue(rdev->bdev); - if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + if (bdev_write_cache(rdev->bdev)) log->wb_cache_on = true; } -- cgit From b611ad14006e5be2170d9e8e611bf49dff288911 Mon Sep 17 00:00:00 2001 From: Jiang Li Date: Mon, 7 Nov 2022 22:16:59 +0800 Subject: md/raid1: stop mdx_raid1 thread when raid1 array run failed fail run raid1 array when we assemble array with the inactive disk only, but the mdx_raid1 thread were not stop, Even if the associated resources have been released. it will caused a NULL dereference when we do poweroff. This causes the following Oops: [ 287.587787] BUG: kernel NULL pointer dereference, address: 0000000000000070 [ 287.594762] #PF: supervisor read access in kernel mode [ 287.599912] #PF: error_code(0x0000) - not-present page [ 287.605061] PGD 0 P4D 0 [ 287.607612] Oops: 0000 [#1] SMP NOPTI [ 287.611287] CPU: 3 PID: 5265 Comm: md0_raid1 Tainted: G U 5.10.146 #0 [ 287.619029] Hardware name: xxxxxxx/To be filled by O.E.M, BIOS 5.19 06/16/2022 [ 287.626775] RIP: 0010:md_check_recovery+0x57/0x500 [md_mod] [ 287.632357] Code: fe 01 00 00 48 83 bb 10 03 00 00 00 74 08 48 89 ...... [ 287.651118] RSP: 0018:ffffc90000433d78 EFLAGS: 00010202 [ 287.656347] RAX: 0000000000000000 RBX: ffff888105986800 RCX: 0000000000000000 [ 287.663491] RDX: ffffc90000433bb0 RSI: 00000000ffffefff RDI: ffff888105986800 [ 287.670634] RBP: ffffc90000433da0 R08: 0000000000000000 R09: c0000000ffffefff [ 287.677771] R10: 0000000000000001 R11: ffffc90000433ba8 R12: ffff888105986800 [ 287.684907] R13: 0000000000000000 R14: fffffffffffffe00 R15: ffff888100b6b500 [ 287.692052] FS: 0000000000000000(0000) GS:ffff888277f80000(0000) knlGS:0000000000000000 [ 287.700149] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 287.705897] CR2: 0000000000000070 CR3: 000000000320a000 CR4: 0000000000350ee0 [ 287.713033] Call Trace: [ 287.715498] raid1d+0x6c/0xbbb [raid1] [ 287.719256] ? __schedule+0x1ff/0x760 [ 287.722930] ? schedule+0x3b/0xb0 [ 287.726260] ? schedule_timeout+0x1ed/0x290 [ 287.730456] ? __switch_to+0x11f/0x400 [ 287.734219] md_thread+0xe9/0x140 [md_mod] [ 287.738328] ? md_thread+0xe9/0x140 [md_mod] [ 287.742601] ? wait_woken+0x80/0x80 [ 287.746097] ? md_register_thread+0xe0/0xe0 [md_mod] [ 287.751064] kthread+0x11a/0x140 [ 287.754300] ? kthread_park+0x90/0x90 [ 287.757974] ret_from_fork+0x1f/0x30 In fact, when raid1 array run fail, we need to do md_unregister_thread() before raid1_free(). Signed-off-by: Jiang Li Signed-off-by: Song Liu --- drivers/md/raid1.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 05d8438cfec8..58f705f42948 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3159,6 +3159,7 @@ static int raid1_run(struct mddev *mddev) * RAID1 needs at least one disk in active */ if (conf->raid_disks - mddev->degraded < 1) { + md_unregister_thread(&conf->thread); ret = -EINVAL; goto abort; } -- cgit From 992ec6a92ac315d3301353ff3beb818fcc34e4e4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 22:10:46 +0800 Subject: dm: remove free_table_devices free_table_devices just warns and frees all table_device structures when the target removal did not remove them. This should never happen, but if it did, just freeing the structure without deleting them from the list or cleaning up the resources would not help at all. So just WARN on a non-empty list instead. Signed-off-by: Christoph Hellwig Signed-off-by: Yu Kuai Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221115141054.1051801-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 95a1ee3d314e..19d25bf997be 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -833,19 +833,6 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) mutex_unlock(&md->table_devices_lock); } -static void free_table_devices(struct list_head *devices) -{ - struct list_head *tmp, *next; - - list_for_each_safe(tmp, next, devices) { - struct table_device *td = list_entry(tmp, struct table_device, list); - - DMWARN("dm_destroy: %s still exists with %d references", - td->dm_dev.name, refcount_read(&td->count)); - kfree(td); - } -} - /* * Get the geometry associated with a dm device */ @@ -2122,7 +2109,7 @@ static void free_dev(struct mapped_device *md) cleanup_mapped_device(md); - free_table_devices(&md->table_devices); + WARN_ON_ONCE(!list_empty(&md->table_devices)); dm_stats_cleanup(&md->stats); free_minor(minor); -- cgit From b9a785d2dc6567b2fd9fc60057a6a945a276927a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 22:10:47 +0800 Subject: dm: cleanup open_table_device Move all the logic for allocation the table_device and linking it into the list into the open_table_device. This keeps the code tidy and ensures that the table_devices only exist in fully initialized state. Signed-off-by: Christoph Hellwig Signed-off-by: Yu Kuai Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221115141054.1051801-4-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 56 +++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 19d25bf997be..28d7581b6a82 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -732,28 +732,41 @@ static char *_dm_claim_ptr = "I belong to device-mapper"; /* * Open a table device so we can use it as a map destination. */ -static int open_table_device(struct table_device *td, dev_t dev, - struct mapped_device *md) +static struct table_device *open_table_device(struct mapped_device *md, + dev_t dev, fmode_t mode) { + struct table_device *td; struct block_device *bdev; u64 part_off; int r; - BUG_ON(td->dm_dev.bdev); + td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); + if (!td) + return ERR_PTR(-ENOMEM); + refcount_set(&td->count, 1); - bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr); + if (IS_ERR(bdev)) { + r = PTR_ERR(bdev); + goto out_free_td; + } r = bd_link_disk_holder(bdev, dm_disk(md)); - if (r) { - blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); - return r; - } + if (r) + goto out_blkdev_put; + td->dm_dev.mode = mode; td->dm_dev.bdev = bdev; td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL); - return 0; + format_dev_t(td->dm_dev.name, dev); + list_add(&td->list, &md->table_devices); + return td; + +out_blkdev_put: + blkdev_put(bdev, mode | FMODE_EXCL); +out_free_td: + kfree(td); + return ERR_PTR(r); } /* @@ -786,31 +799,16 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev, int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, struct dm_dev **result) { - int r; struct table_device *td; mutex_lock(&md->table_devices_lock); td = find_table_device(&md->table_devices, dev, mode); if (!td) { - td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); - if (!td) { - mutex_unlock(&md->table_devices_lock); - return -ENOMEM; - } - - td->dm_dev.mode = mode; - td->dm_dev.bdev = NULL; - - if ((r = open_table_device(td, dev, md))) { + td = open_table_device(md, dev, mode); + if (IS_ERR(td)) { mutex_unlock(&md->table_devices_lock); - kfree(td); - return r; + return PTR_ERR(td); } - - format_dev_t(td->dm_dev.name, dev); - - refcount_set(&td->count, 1); - list_add(&td->list, &md->table_devices); } else { refcount_inc(&td->count); } -- cgit From 7b5865831c1003122f737df5e16adaa583f1a595 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 22:10:48 +0800 Subject: dm: cleanup close_table_device Take the list unlink and free into close_table_device so that no half torn down table_devices exist. Also remove the check for a NULL bdev as that can't happen - open_table_device never adds a table_device to the list that does not have a valid block_device. Signed-off-by: Christoph Hellwig Signed-off-by: Yu Kuai Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221115141054.1051801-5-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 28d7581b6a82..2917700b1e15 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -774,14 +774,11 @@ out_free_td: */ static void close_table_device(struct table_device *td, struct mapped_device *md) { - if (!td->dm_dev.bdev) - return; - bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); put_dax(td->dm_dev.dax_dev); - td->dm_dev.bdev = NULL; - td->dm_dev.dax_dev = NULL; + list_del(&td->list); + kfree(td); } static struct table_device *find_table_device(struct list_head *l, dev_t dev, @@ -823,11 +820,8 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) struct table_device *td = container_of(d, struct table_device, dm_dev); mutex_lock(&md->table_devices_lock); - if (refcount_dec_and_test(&td->count)) { + if (refcount_dec_and_test(&td->count)) close_table_device(td, md); - list_del(&td->list); - kfree(td); - } mutex_unlock(&md->table_devices_lock); } -- cgit From d563792c8933a810d28ce0f2831f0726c2b15a31 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 15 Nov 2022 22:10:49 +0800 Subject: dm: make sure create and remove dm device won't race with open and close table open_table_device() and close_table_device() is protected by table_devices_lock, hence use it to protect add_disk() and del_gendisk(). Prepare to track per-add_disk holder relations in dm. Signed-off-by: Yu Kuai Reviewed-by: Mike Snitzer Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221115141054.1051801-6-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2917700b1e15..3728b56b364b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1952,7 +1952,14 @@ static void cleanup_mapped_device(struct mapped_device *md) spin_unlock(&_minor_lock); if (dm_get_md_type(md) != DM_TYPE_NONE) { dm_sysfs_exit(md); + + /* + * Hold lock to make sure del_gendisk() won't concurrent + * with open/close_table_device(). + */ + mutex_lock(&md->table_devices_lock); del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); } dm_queue_destroy_crypto_profile(md->queue); put_disk(md->disk); @@ -2312,15 +2319,24 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; + /* + * Hold lock to make sure add_disk() and del_gendisk() won't concurrent + * with open_table_device() and close_table_device(). + */ + mutex_lock(&md->table_devices_lock); r = add_disk(md->disk); + mutex_unlock(&md->table_devices_lock); if (r) return r; r = dm_sysfs_init(md); if (r) { + mutex_lock(&md->table_devices_lock); del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); return r; } + md->type = type; return 0; } -- cgit From 1a581b72169968f4154b5793828f3bc28b258b35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 22:10:50 +0800 Subject: dm: track per-add_disk holder relations in DM dm is a bit special in that it opens the underlying devices. Commit 89f871af1b26 ("dm: delay registering the gendisk") tried to accommodate that by allowing to add the holder to the list before add_gendisk and then just add them to sysfs once add_disk is called. But that leads to really odd lifetime problems and error handling problems as we can't know the state of the kobjects and don't unwind properly. To fix this switch to just registering all existing table_devices with the holder code right after add_disk, and remove them before calling del_gendisk. Fixes: 89f871af1b26 ("dm: delay registering the gendisk") Reported-by: Yu Kuai Signed-off-by: Christoph Hellwig Signed-off-by: Yu Kuai Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221115141054.1051801-7-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/md/dm.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3728b56b364b..e1ea3a7bd9d9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -751,9 +751,16 @@ static struct table_device *open_table_device(struct mapped_device *md, goto out_free_td; } - r = bd_link_disk_holder(bdev, dm_disk(md)); - if (r) - goto out_blkdev_put; + /* + * We can be called before the dm disk is added. In that case we can't + * register the holder relation here. It will be done once add_disk was + * called. + */ + if (md->disk->slave_dir) { + r = bd_link_disk_holder(bdev, md->disk); + if (r) + goto out_blkdev_put; + } td->dm_dev.mode = mode; td->dm_dev.bdev = bdev; @@ -774,7 +781,8 @@ out_free_td: */ static void close_table_device(struct table_device *td, struct mapped_device *md) { - bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); + if (md->disk->slave_dir) + bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); put_dax(td->dm_dev.dax_dev); list_del(&td->list); @@ -1951,7 +1959,13 @@ static void cleanup_mapped_device(struct mapped_device *md) md->disk->private_data = NULL; spin_unlock(&_minor_lock); if (dm_get_md_type(md) != DM_TYPE_NONE) { + struct table_device *td; + dm_sysfs_exit(md); + list_for_each_entry(td, &md->table_devices, list) { + bd_unlink_disk_holder(td->dm_dev.bdev, + md->disk); + } /* * Hold lock to make sure del_gendisk() won't concurrent @@ -2291,6 +2305,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) { enum dm_queue_mode type = dm_table_get_type(t); struct queue_limits limits; + struct table_device *td; int r; switch (type) { @@ -2329,16 +2344,30 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; - r = dm_sysfs_init(md); - if (r) { - mutex_lock(&md->table_devices_lock); - del_gendisk(md->disk); - mutex_unlock(&md->table_devices_lock); - return r; + /* + * Register the holder relationship for devices added before the disk + * was live. + */ + list_for_each_entry(td, &md->table_devices, list) { + r = bd_link_disk_holder(td->dm_dev.bdev, md->disk); + if (r) + goto out_undo_holders; } + r = dm_sysfs_init(md); + if (r) + goto out_undo_holders; + md->type = type; return 0; + +out_undo_holders: + list_for_each_entry_continue_reverse(td, &md->table_devices, list) + bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); + mutex_lock(&md->table_devices_lock); + del_gendisk(md->disk); + mutex_unlock(&md->table_devices_lock); + return r; } struct mapped_device *dm_get_md(dev_t dev) -- cgit From fce3caea0f241f5d34855c82c399d5e0e2d91f07 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Nov 2022 05:29:42 +0100 Subject: blk-crypto: don't use struct request_queue for public interfaces Switch all public blk-crypto interfaces to use struct block_device arguments to specify the device they operate on instead of th request_queue, which is a block layer implementation detail. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Link: https://lore.kernel.org/r/20221114042944.1009870-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 078da18bb86d..8541d5688f3a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1215,7 +1215,7 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, struct dm_keyslot_evict_args *args = data; int err; - err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key); + err = blk_crypto_evict_key(dev->bdev, args->key); if (!args->err) args->err = err; /* Always try to evict the key from all devices. */ -- cgit From fb541ca4c36500b7b1d1a4def992059d9af56f14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Nov 2022 14:32:53 +0100 Subject: md: remove lock_bdev / unlock_bdev These wrappers for blkdev_get / blkdev_put just horribly confuse the code with their odd naming. Remove them and improve the error unwinding in md_import_device with the now folded code. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/md.c | 63 ++++++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index f95b86440485..27ce98b018af 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2493,34 +2493,6 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) queue_work(md_rdev_misc_wq, &rdev->del_work); } -/* - * prevent the device from being mounted, repartitioned or - * otherwise reused by a RAID array (or any other kernel - * subsystem), by bd_claiming the device. - */ -static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared) -{ - int err = 0; - struct block_device *bdev; - - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - shared ? (struct md_rdev *)lock_rdev : rdev); - if (IS_ERR(bdev)) { - pr_warn("md: could not open device unknown-block(%u,%u).\n", - MAJOR(dev), MINOR(dev)); - return PTR_ERR(bdev); - } - rdev->bdev = bdev; - return err; -} - -static void unlock_rdev(struct md_rdev *rdev) -{ - struct block_device *bdev = rdev->bdev; - rdev->bdev = NULL; - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - void md_autodetect_dev(dev_t dev); static void export_rdev(struct md_rdev *rdev) @@ -2531,7 +2503,8 @@ static void export_rdev(struct md_rdev *rdev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - unlock_rdev(rdev); + blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -3675,9 +3648,10 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - int err; + static struct md_rdev *claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; + int err; rdev = kzalloc(sizeof(*rdev), GFP_KERNEL); if (!rdev) @@ -3685,14 +3659,20 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe err = md_rdev_init(rdev); if (err) - goto abort_free; + goto out_free_rdev; err = alloc_disk_sb(rdev); if (err) - goto abort_free; + goto out_clear_rdev; - err = lock_rdev(rdev, newdev, super_format == -2); - if (err) - goto abort_free; + rdev->bdev = blkdev_get_by_dev(newdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, + super_format == -2 ? claim_rdev : rdev); + if (IS_ERR(rdev->bdev)) { + pr_warn("md: could not open device unknown-block(%u,%u).\n", + MAJOR(newdev), MINOR(newdev)); + err = PTR_ERR(rdev->bdev); + goto out_clear_rdev; + } kobject_init(&rdev->kobj, &rdev_ktype); @@ -3701,7 +3681,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe pr_warn("md: %pg has zero or unknown size, marking faulty!\n", rdev->bdev); err = -EINVAL; - goto abort_free; + goto out_blkdev_put; } if (super_format >= 0) { @@ -3711,21 +3691,22 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe pr_warn("md: %pg does not have a valid v%d.%d superblock, not importing!\n", rdev->bdev, super_format, super_minor); - goto abort_free; + goto out_blkdev_put; } if (err < 0) { pr_warn("md: could not read %pg's sb, not importing!\n", rdev->bdev); - goto abort_free; + goto out_blkdev_put; } } return rdev; -abort_free: - if (rdev->bdev) - unlock_rdev(rdev); +out_blkdev_put: + blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +out_clear_rdev: md_rdev_clear(rdev); +out_free_rdev: kfree(rdev); return ERR_PTR(err); } -- cgit From d57d9d6965502fd0ca95f17180d655f6dc196002 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Nov 2022 14:32:54 +0100 Subject: md: mark md_kick_rdev_from_array static md_kick_rdev_from_array is only used in md.c, so unexport it and mark the symbol static. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/md.c | 3 +-- drivers/md/md.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 27ce98b018af..94adb4039465 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2508,12 +2508,11 @@ static void export_rdev(struct md_rdev *rdev) kobject_put(&rdev->kobj); } -void md_kick_rdev_from_array(struct md_rdev *rdev) +static void md_kick_rdev_from_array(struct md_rdev *rdev) { unbind_rdev_from_array(rdev); export_rdev(rdev); } -EXPORT_SYMBOL_GPL(md_kick_rdev_from_array); static void export_array(struct mddev *mddev) { diff --git a/drivers/md/md.h b/drivers/md/md.h index b4e2d8b87b61..554a9026669a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -782,7 +782,6 @@ extern void mddev_resume(struct mddev *mddev); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); -extern void md_kick_rdev_from_array(struct md_rdev * rdev); extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, bool is_suspend); extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev, -- cgit From b5c1acf012a7a73e3d0c5c3605ececcca6797267 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Nov 2022 14:32:55 +0100 Subject: md: fold unbind_rdev_from_array into md_kick_rdev_from_array unbind_rdev_from_array is only called from md_kick_rdev_from_array, so merge it into its only caller. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/md.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 94adb4039465..775f1dde190a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2468,7 +2468,22 @@ static void rdev_delayed_delete(struct work_struct *ws) kobject_put(&rdev->kobj); } -static void unbind_rdev_from_array(struct md_rdev *rdev) +void md_autodetect_dev(dev_t dev); + +static void export_rdev(struct md_rdev *rdev) +{ + pr_debug("md: export_rdev(%pg)\n", rdev->bdev); + md_rdev_clear(rdev); +#ifndef MODULE + if (test_bit(AutoDetected, &rdev->flags)) + md_autodetect_dev(rdev->bdev->bd_dev); +#endif + blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + rdev->bdev = NULL; + kobject_put(&rdev->kobj); +} + +static void md_kick_rdev_from_array(struct md_rdev *rdev) { bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); @@ -2491,26 +2506,6 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) INIT_WORK(&rdev->del_work, rdev_delayed_delete); kobject_get(&rdev->kobj); queue_work(md_rdev_misc_wq, &rdev->del_work); -} - -void md_autodetect_dev(dev_t dev); - -static void export_rdev(struct md_rdev *rdev) -{ - pr_debug("md: export_rdev(%pg)\n", rdev->bdev); - md_rdev_clear(rdev); -#ifndef MODULE - if (test_bit(AutoDetected, &rdev->flags)) - md_autodetect_dev(rdev->bdev->bd_dev); -#endif - blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - rdev->bdev = NULL; - kobject_put(&rdev->kobj); -} - -static void md_kick_rdev_from_array(struct md_rdev *rdev) -{ - unbind_rdev_from_array(rdev); export_rdev(rdev); } -- cgit From c34b7ac65087554627f4840f4ecd6f2107a68fd1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Dec 2022 15:40:57 +0100 Subject: block: remove bio_set_op_attrs This macro is obsolete, so replace the last few uses with open coded bi_opf assignments. Signed-off-by: Christoph Hellwig Acked-by: Coly Li > Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20221206144057.720846-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/bcache/movinggc.c | 2 +- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/writeback.c | 4 ++-- drivers/md/dm-thin.c | 2 +- drivers/md/raid1.c | 12 ++++++------ drivers/md/raid10.c | 18 +++++++++--------- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 99499d1f6e66..9f32901fdad1 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -160,7 +160,7 @@ static void read_moving(struct cache_set *c) moving_init(io); bio = &io->bio.bio; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; bio->bi_end_io = read_moving_endio; if (bch_bio_alloc_pages(bio, GFP_KERNEL)) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3427555b0cca..39c7b607f8aa 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -244,7 +244,7 @@ static void bch_data_insert_start(struct closure *cl) trace_bcache_cache_insert(k); bch_keylist_push(&op->insert_keys); - bio_set_op_attrs(n, REQ_OP_WRITE, 0); + n->bi_opf = REQ_OP_WRITE; bch_submit_bbio(n, op->c, k, 0); } while (n != bio); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 0285b676e983..d4a5fc0650bb 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -434,7 +434,7 @@ static void write_dirty(struct closure *cl) */ if (KEY_DIRTY(&w->key)) { dirty_init(w); - bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); + io->bio.bi_opf = REQ_OP_WRITE; io->bio.bi_iter.bi_sector = KEY_START(&w->key); bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; @@ -547,7 +547,7 @@ static void read_dirty(struct cached_dev *dc) io->sequence = sequence++; dirty_init(w); - bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); + io->bio.bi_opf = REQ_OP_READ; io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); bio_set_dev(&io->bio, dc->disk.c->cache->bdev); io->bio.bi_end_io = read_dirty_endio; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e76c96c760a9..c2b5a537f5b8 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -410,7 +410,7 @@ static void end_discard(struct discard_op *op, int r) * need to wait for the chain to complete. */ bio_chain(op->bio, op->parent_bio); - bio_set_op_attrs(op->bio, REQ_OP_DISCARD, 0); + op->bio->bi_opf = REQ_OP_DISCARD; submit_bio(op->bio); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 58f705f42948..68a9e2d9985b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1321,7 +1321,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_end_io = raid1_end_read_request; - bio_set_op_attrs(read_bio, op, do_sync); + read_bio->bi_opf = op | do_sync; if (test_bit(FailFast, &mirror->rdev->flags) && test_bit(R1BIO_FailFast, &r1_bio->state)) read_bio->bi_opf |= MD_FAILFAST; @@ -2254,7 +2254,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) continue; } - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + wbio->bi_opf = REQ_OP_WRITE; if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) wbio->bi_opf |= MD_FAILFAST; @@ -2419,7 +2419,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) GFP_NOIO, &mddev->bio_set); } - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + wbio->bi_opf = REQ_OP_WRITE; wbio->bi_iter.bi_sector = r1_bio->sector; wbio->bi_iter.bi_size = r1_bio->sectors << 9; @@ -2770,7 +2770,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (i < conf->raid_disks) still_degraded = 1; } else if (!test_bit(In_sync, &rdev->flags)) { - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else { @@ -2797,7 +2797,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (disk < 0) disk = i; } - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; bio->bi_end_io = end_sync_read; read_targets++; } else if (!test_bit(WriteErrorSeen, &rdev->flags) && @@ -2809,7 +2809,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * if we are doing resync or repair. Otherwise, leave * this device alone for this sync request. */ - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; write_targets++; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9a6503f5cb98..6c66357f92f5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1254,7 +1254,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); read_bio->bi_end_io = raid10_end_read_request; - bio_set_op_attrs(read_bio, op, do_sync); + read_bio->bi_opf = op | do_sync; if (test_bit(FailFast, &rdev->flags) && test_bit(R10BIO_FailFast, &r10_bio->state)) read_bio->bi_opf |= MD_FAILFAST; @@ -1301,7 +1301,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); mbio->bi_end_io = raid10_end_write_request; - bio_set_op_attrs(mbio, op, do_sync | do_fua); + mbio->bi_opf = op | do_sync | do_fua; if (!replacement && test_bit(FailFast, &conf->mirrors[devnum].rdev->flags) && enough(conf, devnum)) @@ -2933,7 +2933,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + choose_data_offset(r10_bio, rdev); - bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); + wbio->bi_opf = REQ_OP_WRITE; if (submit_bio_wait(wbio) < 0) /* Failure! */ @@ -3542,7 +3542,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_read; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; from_addr = r10_bio->devs[j].addr; @@ -3567,7 +3567,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_iter.bi_sector = to_addr + mrdev->data_offset; bio_set_dev(bio, mrdev->bdev); @@ -3588,7 +3588,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; bio->bi_iter.bi_sector = to_addr + mreplace->data_offset; bio_set_dev(bio, mreplace->bdev); @@ -3742,7 +3742,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_read; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; @@ -3764,7 +3764,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio->bi_next = biolist; biolist = bio; bio->bi_end_io = end_sync_write; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; @@ -4970,7 +4970,7 @@ read_more: b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; b->bi_end_io = end_reshape_write; - bio_set_op_attrs(b, REQ_OP_WRITE, 0); + b->bi_opf = REQ_OP_WRITE; b->bi_next = blist; blist = b; } -- cgit