diff options
Diffstat (limited to 'drivers/md/dm-mpath.c')
| -rw-r--r-- | drivers/md/dm-mpath.c | 393 |
1 files changed, 264 insertions, 129 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 0e325469a252..aaf4a0a4b0eb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2003 Sistina Software Limited. * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. @@ -27,9 +28,11 @@ #include <linux/atomic.h> #include <linux/blk-mq.h> +static struct workqueue_struct *dm_mpath_wq; + #define DM_MSG_PREFIX "multipath" #define DM_PG_INIT_DELAY_MSECS 2000 -#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) +#define DM_PG_INIT_DELAY_DEFAULT ((unsigned int) -1) #define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0 static unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; @@ -39,7 +42,7 @@ struct pgpath { struct list_head list; struct priority_group *pg; /* Owning PG */ - unsigned fail_count; /* Cumulative failure count */ + unsigned int fail_count; /* Cumulative failure count */ struct dm_path path; struct delayed_work activate_path; @@ -59,8 +62,8 @@ struct priority_group { struct multipath *m; /* Owning multipath instance */ struct path_selector ps; - unsigned pg_num; /* Reference number */ - unsigned nr_pgpaths; /* Number of paths in PG */ + unsigned int pg_num; /* Reference number */ + unsigned int nr_pgpaths; /* Number of paths in PG */ struct list_head pgpaths; bool bypassed:1; /* Temporarily bypass this PG? */ @@ -76,16 +79,18 @@ struct multipath { struct pgpath *current_pgpath; struct priority_group *current_pg; struct priority_group *next_pg; /* Switch to this PG if set */ + struct priority_group *last_probed_pg; atomic_t nr_valid_paths; /* Total number of usable paths */ - unsigned nr_priority_groups; + unsigned int nr_priority_groups; struct list_head priority_groups; const char *hw_handler_name; char *hw_handler_params; wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ - unsigned pg_init_retries; /* Number of times to retry pg_init */ - unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ + wait_queue_head_t probe_wait; /* Wait for probing paths */ + unsigned int pg_init_retries; /* Number of times to retry pg_init */ + unsigned int pg_init_delay_msecs; /* Number of msecs before pg_init retry */ atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ atomic_t pg_init_count; /* Number of times pg_init called */ @@ -97,6 +102,7 @@ struct multipath { struct bio_list queued_bios; struct timer_list nopath_timer; /* Timeout for queue_if_no_path */ + bool is_suspending; }; /* @@ -117,10 +123,11 @@ static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); static void queue_if_no_path_timeout_work(struct timer_list *t); -/*----------------------------------------------- +/* + *----------------------------------------------- * Multipath state flags. - *-----------------------------------------------*/ - + *----------------------------------------------- + */ #define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */ #define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */ #define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */ @@ -128,6 +135,8 @@ static void queue_if_no_path_timeout_work(struct timer_list *t); #define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */ #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ +#define MPATHF_DELAY_PG_SWITCH 7 /* Delay switching pg if it still has paths */ +#define MPATHF_NEED_PG_SWITCH 8 /* Need to switch pgs after the delay has ended */ static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m) { @@ -135,6 +144,7 @@ static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m) if (r) { unsigned long flags; + spin_lock_irqsave(&m->lock, flags); r = test_bit(MPATHF_bit, &m->flags); spin_unlock_irqrestore(&m->lock, flags); @@ -143,10 +153,11 @@ static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m) return r; } -/*----------------------------------------------- +/* + *----------------------------------------------- * Allocation routines - *-----------------------------------------------*/ - + *----------------------------------------------- + */ static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); @@ -248,6 +259,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) atomic_set(&m->pg_init_count, 0); m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; init_waitqueue_head(&m->pg_init_wait); + init_waitqueue_head(&m->probe_wait); return 0; } @@ -302,10 +314,11 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp dm_bio_record(bio_details, bio); } -/*----------------------------------------------- +/* + *----------------------------------------------- * Path selection - *-----------------------------------------------*/ - + *----------------------------------------------- + */ static int __pg_init_all_paths(struct multipath *m) { struct pgpath *pgpath; @@ -397,7 +410,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned long flags; struct priority_group *pg; struct pgpath *pgpath; - unsigned bypassed = 1; + unsigned int bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { spin_lock_irqsave(&m->lock, flags); @@ -406,13 +419,21 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) goto failed; } + /* Don't change PG until it has no remaining paths */ + pg = READ_ONCE(m->current_pg); + if (pg) { + pgpath = choose_path_in_pg(m, pg, nr_bytes); + if (!IS_ERR_OR_NULL(pgpath)) + return pgpath; + } + /* Were we instructed to switch PG? */ if (READ_ONCE(m->next_pg)) { spin_lock_irqsave(&m->lock, flags); pg = m->next_pg; if (!pg) { spin_unlock_irqrestore(&m->lock, flags); - goto check_current_pg; + goto check_all_pgs; } m->next_pg = NULL; spin_unlock_irqrestore(&m->lock, flags); @@ -420,16 +441,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) if (!IS_ERR_OR_NULL(pgpath)) return pgpath; } - - /* Don't change PG until it has no remaining paths */ -check_current_pg: - pg = READ_ONCE(m->current_pg); - if (pg) { - pgpath = choose_path_in_pg(m, pg, nr_bytes); - if (!IS_ERR_OR_NULL(pgpath)) - return pgpath; - } - +check_all_pgs: /* * Loop through priority groups until we find a valid path. * First time we skip PGs marked 'bypassed'. @@ -467,13 +479,11 @@ failed: * it has been invoked. */ #define dm_report_EIO(m) \ -do { \ DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \ dm_table_device_name((m)->ti->table), \ test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ - dm_noflush_suspending((m)->ti)); \ -} while (0) + dm_noflush_suspending((m)->ti)) /* * Check whether bios must be queued in the device-mapper core rather @@ -607,7 +617,6 @@ static void multipath_queue_bio(struct multipath *m, struct bio *bio) static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) { struct pgpath *pgpath; - unsigned long flags; /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); @@ -615,12 +624,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) pgpath = choose_pgpath(m, bio->bi_iter.bi_size); if (!pgpath) { - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { __multipath_queue_bio(m, bio); pgpath = ERR_PTR(-EAGAIN); } - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { @@ -683,7 +692,6 @@ static void process_queued_io_list(struct multipath *m) static void process_queued_bios(struct work_struct *work) { int r; - unsigned long flags; struct bio *bio; struct bio_list bios; struct blk_plug plug; @@ -692,21 +700,21 @@ static void process_queued_bios(struct work_struct *work) bio_list_init(&bios); - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); if (bio_list_empty(&m->queued_bios)) { - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); return; } - bio_list_merge(&bios, &m->queued_bios); - bio_list_init(&m->queued_bios); + bio_list_merge_init(&bios, &m->queued_bios); - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); blk_start_plug(&plug); while ((bio = bio_list_pop(&bios))) { struct dm_mpath_io *mpio = get_mpio_from_bio(bio); + dm_bio_restore(get_bio_details_from_mpio(mpio), bio); r = __multipath_map_bio(m, bio, mpio); switch (r) { @@ -733,15 +741,15 @@ static void process_queued_bios(struct work_struct *work) /* * If we run out of usable paths, should we queue I/O or error it? */ -static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, +static int queue_if_no_path(struct multipath *m, bool f_queue_if_no_path, bool save_old_value, const char *caller) { unsigned long flags; bool queue_if_no_path_bit, saved_queue_if_no_path_bit; const char *dm_dev_name = dm_table_device_name(m->ti->table); - DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d", - dm_dev_name, __func__, caller, queue_if_no_path, save_old_value); + DMDEBUG("%s: %s caller=%s f_queue_if_no_path=%d save_old_value=%d", + dm_dev_name, __func__, caller, f_queue_if_no_path, save_old_value); spin_lock_irqsave(&m->lock, flags); @@ -754,11 +762,11 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, dm_dev_name); } else assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit); - } else if (!queue_if_no_path && saved_queue_if_no_path_bit) { + } else if (!f_queue_if_no_path && saved_queue_if_no_path_bit) { /* due to "fail_if_no_path" message, need to honor it. */ clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); } - assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); + assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, f_queue_if_no_path); DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d", dm_dev_name, __func__, @@ -768,7 +776,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, spin_unlock_irqrestore(&m->lock, flags); - if (!queue_if_no_path) { + if (!f_queue_if_no_path) { dm_table_run_md_queue_async(m->ti->table); process_queued_io_list(m); } @@ -782,7 +790,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, */ static void queue_if_no_path_timeout_work(struct timer_list *t) { - struct multipath *m = from_timer(m, t, nopath_timer); + struct multipath *m = timer_container_of(m, t, nopath_timer); DMWARN("queue_if_no_path timeout on %s, failing queued IO", dm_table_device_name(m->ti->table)); @@ -810,7 +818,7 @@ static void enable_nopath_timeout(struct multipath *m) static void disable_nopath_timeout(struct multipath *m) { - del_timer_sync(&m->nopath_timer); + timer_delete_sync(&m->nopath_timer); } /* @@ -825,7 +833,8 @@ static void trigger_event(struct work_struct *work) dm_table_event(m->ti->table); } -/*----------------------------------------------------------------- +/* + *--------------------------------------------------------------- * Constructor/argument parsing: * <#multipath feature args> [<arg>]* * <#hw_handler args> [hw_handler [<arg>]*] @@ -834,13 +843,14 @@ static void trigger_event(struct work_struct *work) * [<selector> <#selector args> [<arg>]* * <#paths> <#per-path selector args> * [<path> [<arg>]* ]+ ]+ - *---------------------------------------------------------------*/ + *--------------------------------------------------------------- + */ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, struct dm_target *ti) { int r; struct path_selector_type *pst; - unsigned ps_argc; + unsigned int ps_argc; static const struct dm_arg _args[] = { {0, 1024, "invalid number of path selector args"}, @@ -983,7 +993,7 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as, }; int r; - unsigned i, nr_selector_args, nr_args; + unsigned int i, nr_selector_args, nr_args; struct priority_group *pg; struct dm_target *ti = m->ti; @@ -1049,7 +1059,7 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as, static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) { - unsigned hw_argc; + unsigned int hw_argc; int ret; struct dm_target *ti = m->ti; @@ -1086,7 +1096,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) goto fail; } j = sprintf(p, "%d", hw_argc - 1); - for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) + for (i = 0, p += j + 1; i <= hw_argc - 2; i++, p += j + 1) j = sprintf(p, "%s", as->argv[i]); } dm_consume_args(as, hw_argc - 1); @@ -1101,7 +1111,7 @@ fail: static int parse_features(struct dm_arg_set *as, struct multipath *m) { int r; - unsigned argc; + unsigned int argc; struct dm_target *ti = m->ti; const char *arg_name; @@ -1170,7 +1180,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) return r; } -static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) +static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv) { /* target arguments */ static const struct dm_arg _args[] = { @@ -1181,9 +1191,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) int r; struct multipath *m; struct dm_arg_set as; - unsigned pg_count = 0; - unsigned next_pg_num; - unsigned long flags; + unsigned int pg_count = 0; + unsigned int next_pg_num; as.argc = argc; as.argv = argv; @@ -1224,7 +1233,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) /* parse the priority groups */ while (as.argc) { struct priority_group *pg; - unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths); + unsigned int nr_valid_paths = atomic_read(&m->nr_valid_paths); pg = parse_priority_group(&as, m); if (IS_ERR(pg)) { @@ -1248,9 +1257,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); enable_nopath_timeout(m); - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); ti->num_flush_bios = 1; ti->num_discard_bios = 1; @@ -1285,23 +1294,21 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m) static void flush_multipath_work(struct multipath *m) { if (m->hw_handler_name) { - unsigned long flags; - if (!atomic_read(&m->pg_init_in_progress)) goto skip; - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); if (atomic_read(&m->pg_init_in_progress) && !test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) { - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); flush_workqueue(kmpath_handlerd); multipath_wait_for_pg_init_completion(m); - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); } - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); } skip: if (m->queue_mode == DM_TYPE_BIO_BASED) @@ -1347,7 +1354,7 @@ static int fail_path(struct pgpath *pgpath) dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, pgpath->path.dev->name, atomic_read(&m->nr_valid_paths)); - schedule_work(&m->trigger_event); + queue_work(dm_mpath_wq, &m->trigger_event); enable_nopath_timeout(m); @@ -1363,11 +1370,10 @@ out: static int reinstate_path(struct pgpath *pgpath) { int r = 0, run_queue = 0; - unsigned long flags; struct multipath *m = pgpath->pg->m; - unsigned nr_valid_paths; + unsigned int nr_valid_paths; - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); if (pgpath->is_active) goto out; @@ -1397,7 +1403,7 @@ static int reinstate_path(struct pgpath *pgpath) schedule_work(&m->trigger_event); out: - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); if (run_queue) { dm_table_run_md_queue_async(m->ti->table); process_queued_io_list(m); @@ -1412,8 +1418,7 @@ out: /* * Fail or reinstate all paths that match the provided struct dm_dev. */ -static int action_dev(struct multipath *m, struct dm_dev *dev, - action_fn action) +static int action_dev(struct multipath *m, dev_t dev, action_fn action) { int r = -EINVAL; struct pgpath *pgpath; @@ -1421,7 +1426,7 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, list_for_each_entry(pg, &m->priority_groups, list) { list_for_each_entry(pgpath, &pg->pgpaths, list) { - if (pgpath->path.dev == dev) + if (pgpath->path.dev->bdev->bd_dev == dev) r = action(pgpath); } } @@ -1433,15 +1438,19 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, * Temporarily try to avoid having to use the specified PG */ static void bypass_pg(struct multipath *m, struct priority_group *pg, - bool bypassed) + bool bypassed, bool can_be_delayed) { unsigned long flags; spin_lock_irqsave(&m->lock, flags); pg->bypassed = bypassed; - m->current_pgpath = NULL; - m->current_pg = NULL; + if (can_be_delayed && test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) + set_bit(MPATHF_NEED_PG_SWITCH, &m->flags); + else { + m->current_pgpath = NULL; + m->current_pg = NULL; + } spin_unlock_irqrestore(&m->lock, flags); @@ -1454,27 +1463,30 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg, static int switch_pg_num(struct multipath *m, const char *pgstr) { struct priority_group *pg; - unsigned pgnum; - unsigned long flags; + unsigned int pgnum; char dummy; if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { - DMWARN("invalid PG number supplied to switch_pg_num"); + DMWARN("invalid PG number supplied to %s", __func__); return -EINVAL; } - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); list_for_each_entry(pg, &m->priority_groups, list) { pg->bypassed = false; if (--pgnum) continue; - m->current_pgpath = NULL; - m->current_pg = NULL; + if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) + set_bit(MPATHF_NEED_PG_SWITCH, &m->flags); + else { + m->current_pgpath = NULL; + m->current_pg = NULL; + } m->next_pg = pg; } - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); schedule_work(&m->trigger_event); return 0; @@ -1487,7 +1499,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) { struct priority_group *pg; - unsigned pgnum; + unsigned int pgnum; char dummy; if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || @@ -1501,7 +1513,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) break; } - bypass_pg(m, pg, bypassed); + bypass_pg(m, pg, bypassed, true); return 0; } @@ -1555,7 +1567,7 @@ static void pg_init_done(void *data, int errors) * Probably doing something like FW upgrade on the * controller so try the other pg. */ - bypass_pg(m, pg, true); + bypass_pg(m, pg, true, false); break; case SCSI_DH_RETRY: /* Wait before retrying. */ @@ -1736,6 +1748,9 @@ static void multipath_presuspend(struct dm_target *ti) { struct multipath *m = ti->private; + spin_lock_irq(&m->lock); + m->is_suspending = true; + spin_unlock_irq(&m->lock); /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */ if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti)) queue_if_no_path(m, false, true, __func__); @@ -1756,9 +1771,9 @@ static void multipath_postsuspend(struct dm_target *ti) static void multipath_resume(struct dm_target *ti) { struct multipath *m = ti->private; - unsigned long flags; - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); + m->is_suspending = false; if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) { set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); @@ -1769,7 +1784,7 @@ static void multipath_resume(struct dm_target *ti) test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)); - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); } /* @@ -1789,17 +1804,16 @@ static void multipath_resume(struct dm_target *ti) * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ static void multipath_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) + unsigned int status_flags, char *result, unsigned int maxlen) { int sz = 0, pg_counter, pgpath_counter; - unsigned long flags; struct multipath *m = ti->private; struct priority_group *pg; struct pgpath *p; - unsigned pg_num; + unsigned int pg_num; char state; - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); /* Features */ if (type == STATUSTYPE_INFO) @@ -1821,7 +1835,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type, if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) DMEMIT("retain_attached_hw_handler "); if (m->queue_mode != DM_TYPE_REQUEST_BASED) { - switch(m->queue_mode) { + switch (m->queue_mode) { case DM_TYPE_BIO_BASED: DMEMIT("queue_mode bio "); break; @@ -1839,10 +1853,10 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("%u ", m->nr_priority_groups); - if (m->next_pg) - pg_num = m->next_pg->pg_num; - else if (m->current_pg) + if (m->current_pg) pg_num = m->current_pg->pg_num; + else if (m->next_pg) + pg_num = m->next_pg->pg_num; else pg_num = (m->nr_priority_groups ? 1 : 0); @@ -1945,17 +1959,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type, break; } - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); } -static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, - char *result, unsigned maxlen) +static int multipath_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned int maxlen) { int r = -EINVAL; - struct dm_dev *dev; + dev_t dev; struct multipath *m = ti->private; action_fn action; - unsigned long flags; mutex_lock(&m->work_mutex); @@ -1967,9 +1980,9 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, if (argc == 1) { if (!strcasecmp(argv[0], "queue_if_no_path")) { r = queue_if_no_path(m, true, false, __func__); - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); enable_nopath_timeout(m); - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); goto out; } else if (!strcasecmp(argv[0], "fail_if_no_path")) { r = queue_if_no_path(m, false, false, __func__); @@ -2001,7 +2014,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, goto out; } - r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); + r = dm_devt_from_path(argv[1], &dev); if (r) { DMWARN("message: error getting device %s", argv[1]); @@ -2010,21 +2023,137 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, r = action_dev(m, dev, action); - dm_put_device(ti, dev); - out: mutex_unlock(&m->work_mutex); return r; } +/* + * Perform a minimal read from the given path to find out whether the + * path still works. If a path error occurs, fail it. + */ +static int probe_path(struct pgpath *pgpath) +{ + struct block_device *bdev = pgpath->path.dev->bdev; + unsigned int read_size = bdev_logical_block_size(bdev); + struct page *page; + struct bio *bio; + blk_status_t status; + int r = 0; + + if (WARN_ON_ONCE(read_size > PAGE_SIZE)) + return -EINVAL; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + /* Perform a minimal read: Sector 0, length read_size */ + bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL); + if (!bio) { + r = -ENOMEM; + goto out; + } + + bio->bi_iter.bi_sector = 0; + __bio_add_page(bio, page, read_size, 0); + submit_bio_wait(bio); + status = bio->bi_status; + bio_put(bio); + + if (status && blk_path_error(status)) + fail_path(pgpath); + +out: + __free_page(page); + return r; +} + +/* + * Probe all active paths in current_pg to find out whether they still work. + * Fail all paths that do not work. + * + * Return -ENOTCONN if no valid path is left (even outside of current_pg). We + * cannot probe paths in other pgs without switching current_pg, so if valid + * paths are only in different pgs, they may or may not work. Additionally + * we should not probe paths in a pathgroup that is in the process of + * Initializing. Userspace can submit a request and we'll switch and wait + * for the pathgroup to be initialized. If the request fails, it may need to + * probe again. + */ +static int probe_active_paths(struct multipath *m) +{ + struct pgpath *pgpath; + struct priority_group *pg = NULL; + int r = 0; + + spin_lock_irq(&m->lock); + if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) { + wait_event_lock_irq(m->probe_wait, + !test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags), + m->lock); + /* + * if we waited because a probe was already in progress, + * and it probed the current active pathgroup, don't + * reprobe. Just return the number of valid paths + */ + if (m->current_pg == m->last_probed_pg) + goto skip_probe; + } + if (!m->current_pg || m->is_suspending || + test_bit(MPATHF_QUEUE_IO, &m->flags)) + goto skip_probe; + set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags); + pg = m->last_probed_pg = m->current_pg; + spin_unlock_irq(&m->lock); + + list_for_each_entry(pgpath, &pg->pgpaths, list) { + if (pg != READ_ONCE(m->current_pg) || + READ_ONCE(m->is_suspending)) + goto out; + if (!pgpath->is_active) + continue; + + r = probe_path(pgpath); + if (r < 0) + goto out; + } + +out: + spin_lock_irq(&m->lock); + clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags); + if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) { + m->current_pgpath = NULL; + m->current_pg = NULL; + } +skip_probe: + if (r == 0 && !atomic_read(&m->nr_valid_paths)) + r = -ENOTCONN; + spin_unlock_irq(&m->lock); + if (pg) + wake_up(&m->probe_wait); + return r; +} + static int multipath_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev) + struct block_device **bdev, + unsigned int cmd, unsigned long arg, + bool *forward) { struct multipath *m = ti->private; struct pgpath *pgpath; - unsigned long flags; int r; + if (_IOC_TYPE(cmd) == DM_IOCTL) { + *forward = false; + switch (cmd) { + case DM_MPATH_PROBE_PATHS: + return probe_active_paths(m); + default: + return -ENOTTY; + } + } + pgpath = READ_ONCE(m->current_pgpath); if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) pgpath = choose_pgpath(m, 0); @@ -2040,10 +2169,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti, } else { /* No path is available */ r = -EIO; - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) r = -ENOTCONN; - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); } if (r == -ENOTCONN) { @@ -2051,10 +2180,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti, /* Path status changed, redo selection */ (void) choose_pgpath(m, 0); } - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) (void) __pg_init_all_paths(m); - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); dm_table_run_md_queue_async(m->ti->table); process_queued_io_list(m); } @@ -2116,6 +2245,7 @@ static int multipath_busy(struct dm_target *ti) /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ if (!atomic_read(&m->nr_valid_paths)) { unsigned long flags; + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { spin_unlock_irqrestore(&m->lock, flags); @@ -2168,12 +2298,14 @@ static int multipath_busy(struct dm_target *ti) return busy; } -/*----------------------------------------------------------------- +/* + *--------------------------------------------------------------- * Module setup - *---------------------------------------------------------------*/ + *--------------------------------------------------------------- + */ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 14, 0}, + .version = {1, 15, 0}, .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE | DM_TARGET_PASSES_INTEGRITY, .module = THIS_MODULE, @@ -2196,12 +2328,11 @@ static struct target_type multipath_target = { static int __init dm_multipath_init(void) { - int r; + int r = -ENOMEM; kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); if (!kmultipathd) { DMERR("failed to create workqueue kmpathd"); - r = -ENOMEM; goto bad_alloc_kmultipathd; } @@ -2215,20 +2346,24 @@ static int __init dm_multipath_init(void) WQ_MEM_RECLAIM); if (!kmpath_handlerd) { DMERR("failed to create workqueue kmpath_handlerd"); - r = -ENOMEM; goto bad_alloc_kmpath_handlerd; } + dm_mpath_wq = alloc_workqueue("dm_mpath_wq", 0, 0); + if (!dm_mpath_wq) { + DMERR("failed to create workqueue dm_mpath_wq"); + goto bad_alloc_dm_mpath_wq; + } + r = dm_register_target(&multipath_target); - if (r < 0) { - DMERR("request-based register failed %d", r); - r = -EINVAL; + if (r < 0) goto bad_register_target; - } return 0; bad_register_target: + destroy_workqueue(dm_mpath_wq); +bad_alloc_dm_mpath_wq: destroy_workqueue(kmpath_handlerd); bad_alloc_kmpath_handlerd: destroy_workqueue(kmultipathd); @@ -2238,6 +2373,7 @@ bad_alloc_kmultipathd: static void __exit dm_multipath_exit(void) { + destroy_workqueue(dm_mpath_wq); destroy_workqueue(kmpath_handlerd); destroy_workqueue(kmultipathd); @@ -2247,10 +2383,9 @@ static void __exit dm_multipath_exit(void) module_init(dm_multipath_init); module_exit(dm_multipath_exit); -module_param_named(queue_if_no_path_timeout_secs, - queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR); +module_param_named(queue_if_no_path_timeout_secs, queue_if_no_path_timeout_secs, ulong, 0644); MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds"); MODULE_DESCRIPTION(DM_NAME " multipath target"); -MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); +MODULE_AUTHOR("Sistina Software <dm-devel@lists.linux.dev>"); MODULE_LICENSE("GPL"); |
