summaryrefslogtreecommitdiff
path: root/drivers/md/dm-mpath.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-mpath.c')
-rw-r--r--drivers/md/dm-mpath.c393
1 files changed, 264 insertions, 129 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 0e325469a252..aaf4a0a4b0eb 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2003 Sistina Software Limited.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
@@ -27,9 +28,11 @@
#include <linux/atomic.h>
#include <linux/blk-mq.h>
+static struct workqueue_struct *dm_mpath_wq;
+
#define DM_MSG_PREFIX "multipath"
#define DM_PG_INIT_DELAY_MSECS 2000
-#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
+#define DM_PG_INIT_DELAY_DEFAULT ((unsigned int) -1)
#define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0
static unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT;
@@ -39,7 +42,7 @@ struct pgpath {
struct list_head list;
struct priority_group *pg; /* Owning PG */
- unsigned fail_count; /* Cumulative failure count */
+ unsigned int fail_count; /* Cumulative failure count */
struct dm_path path;
struct delayed_work activate_path;
@@ -59,8 +62,8 @@ struct priority_group {
struct multipath *m; /* Owning multipath instance */
struct path_selector ps;
- unsigned pg_num; /* Reference number */
- unsigned nr_pgpaths; /* Number of paths in PG */
+ unsigned int pg_num; /* Reference number */
+ unsigned int nr_pgpaths; /* Number of paths in PG */
struct list_head pgpaths;
bool bypassed:1; /* Temporarily bypass this PG? */
@@ -76,16 +79,18 @@ struct multipath {
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
+ struct priority_group *last_probed_pg;
atomic_t nr_valid_paths; /* Total number of usable paths */
- unsigned nr_priority_groups;
+ unsigned int nr_priority_groups;
struct list_head priority_groups;
const char *hw_handler_name;
char *hw_handler_params;
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
- unsigned pg_init_retries; /* Number of times to retry pg_init */
- unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
+ wait_queue_head_t probe_wait; /* Wait for probing paths */
+ unsigned int pg_init_retries; /* Number of times to retry pg_init */
+ unsigned int pg_init_delay_msecs; /* Number of msecs before pg_init retry */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
atomic_t pg_init_count; /* Number of times pg_init called */
@@ -97,6 +102,7 @@ struct multipath {
struct bio_list queued_bios;
struct timer_list nopath_timer; /* Timeout for queue_if_no_path */
+ bool is_suspending;
};
/*
@@ -117,10 +123,11 @@ static void activate_path_work(struct work_struct *work);
static void process_queued_bios(struct work_struct *work);
static void queue_if_no_path_timeout_work(struct timer_list *t);
-/*-----------------------------------------------
+/*
+ *-----------------------------------------------
* Multipath state flags.
- *-----------------------------------------------*/
-
+ *-----------------------------------------------
+ */
#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */
#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */
#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */
@@ -128,6 +135,8 @@ static void queue_if_no_path_timeout_work(struct timer_list *t);
#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
+#define MPATHF_DELAY_PG_SWITCH 7 /* Delay switching pg if it still has paths */
+#define MPATHF_NEED_PG_SWITCH 8 /* Need to switch pgs after the delay has ended */
static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
{
@@ -135,6 +144,7 @@ static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
if (r) {
unsigned long flags;
+
spin_lock_irqsave(&m->lock, flags);
r = test_bit(MPATHF_bit, &m->flags);
spin_unlock_irqrestore(&m->lock, flags);
@@ -143,10 +153,11 @@ static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
return r;
}
-/*-----------------------------------------------
+/*
+ *-----------------------------------------------
* Allocation routines
- *-----------------------------------------------*/
-
+ *-----------------------------------------------
+ */
static struct pgpath *alloc_pgpath(void)
{
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
@@ -248,6 +259,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
+ init_waitqueue_head(&m->probe_wait);
return 0;
}
@@ -302,10 +314,11 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp
dm_bio_record(bio_details, bio);
}
-/*-----------------------------------------------
+/*
+ *-----------------------------------------------
* Path selection
- *-----------------------------------------------*/
-
+ *-----------------------------------------------
+ */
static int __pg_init_all_paths(struct multipath *m)
{
struct pgpath *pgpath;
@@ -397,7 +410,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned long flags;
struct priority_group *pg;
struct pgpath *pgpath;
- unsigned bypassed = 1;
+ unsigned int bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
spin_lock_irqsave(&m->lock, flags);
@@ -406,13 +419,21 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
goto failed;
}
+ /* Don't change PG until it has no remaining paths */
+ pg = READ_ONCE(m->current_pg);
+ if (pg) {
+ pgpath = choose_path_in_pg(m, pg, nr_bytes);
+ if (!IS_ERR_OR_NULL(pgpath))
+ return pgpath;
+ }
+
/* Were we instructed to switch PG? */
if (READ_ONCE(m->next_pg)) {
spin_lock_irqsave(&m->lock, flags);
pg = m->next_pg;
if (!pg) {
spin_unlock_irqrestore(&m->lock, flags);
- goto check_current_pg;
+ goto check_all_pgs;
}
m->next_pg = NULL;
spin_unlock_irqrestore(&m->lock, flags);
@@ -420,16 +441,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
if (!IS_ERR_OR_NULL(pgpath))
return pgpath;
}
-
- /* Don't change PG until it has no remaining paths */
-check_current_pg:
- pg = READ_ONCE(m->current_pg);
- if (pg) {
- pgpath = choose_path_in_pg(m, pg, nr_bytes);
- if (!IS_ERR_OR_NULL(pgpath))
- return pgpath;
- }
-
+check_all_pgs:
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
@@ -467,13 +479,11 @@ failed:
* it has been invoked.
*/
#define dm_report_EIO(m) \
-do { \
DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \
dm_table_device_name((m)->ti->table), \
test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
- dm_noflush_suspending((m)->ti)); \
-} while (0)
+ dm_noflush_suspending((m)->ti))
/*
* Check whether bios must be queued in the device-mapper core rather
@@ -607,7 +617,6 @@ static void multipath_queue_bio(struct multipath *m, struct bio *bio)
static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
- unsigned long flags;
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath);
@@ -615,12 +624,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if (!pgpath) {
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
__multipath_queue_bio(m, bio);
pgpath = ERR_PTR(-EAGAIN);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
} else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
@@ -683,7 +692,6 @@ static void process_queued_io_list(struct multipath *m)
static void process_queued_bios(struct work_struct *work)
{
int r;
- unsigned long flags;
struct bio *bio;
struct bio_list bios;
struct blk_plug plug;
@@ -692,21 +700,21 @@ static void process_queued_bios(struct work_struct *work)
bio_list_init(&bios);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (bio_list_empty(&m->queued_bios)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
return;
}
- bio_list_merge(&bios, &m->queued_bios);
- bio_list_init(&m->queued_bios);
+ bio_list_merge_init(&bios, &m->queued_bios);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+
dm_bio_restore(get_bio_details_from_mpio(mpio), bio);
r = __multipath_map_bio(m, bio, mpio);
switch (r) {
@@ -733,15 +741,15 @@ static void process_queued_bios(struct work_struct *work)
/*
* If we run out of usable paths, should we queue I/O or error it?
*/
-static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
+static int queue_if_no_path(struct multipath *m, bool f_queue_if_no_path,
bool save_old_value, const char *caller)
{
unsigned long flags;
bool queue_if_no_path_bit, saved_queue_if_no_path_bit;
const char *dm_dev_name = dm_table_device_name(m->ti->table);
- DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d",
- dm_dev_name, __func__, caller, queue_if_no_path, save_old_value);
+ DMDEBUG("%s: %s caller=%s f_queue_if_no_path=%d save_old_value=%d",
+ dm_dev_name, __func__, caller, f_queue_if_no_path, save_old_value);
spin_lock_irqsave(&m->lock, flags);
@@ -754,11 +762,11 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
dm_dev_name);
} else
assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit);
- } else if (!queue_if_no_path && saved_queue_if_no_path_bit) {
+ } else if (!f_queue_if_no_path && saved_queue_if_no_path_bit) {
/* due to "fail_if_no_path" message, need to honor it. */
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
}
- assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
+ assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, f_queue_if_no_path);
DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d",
dm_dev_name, __func__,
@@ -768,7 +776,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
spin_unlock_irqrestore(&m->lock, flags);
- if (!queue_if_no_path) {
+ if (!f_queue_if_no_path) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
@@ -782,7 +790,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
*/
static void queue_if_no_path_timeout_work(struct timer_list *t)
{
- struct multipath *m = from_timer(m, t, nopath_timer);
+ struct multipath *m = timer_container_of(m, t, nopath_timer);
DMWARN("queue_if_no_path timeout on %s, failing queued IO",
dm_table_device_name(m->ti->table));
@@ -810,7 +818,7 @@ static void enable_nopath_timeout(struct multipath *m)
static void disable_nopath_timeout(struct multipath *m)
{
- del_timer_sync(&m->nopath_timer);
+ timer_delete_sync(&m->nopath_timer);
}
/*
@@ -825,7 +833,8 @@ static void trigger_event(struct work_struct *work)
dm_table_event(m->ti->table);
}
-/*-----------------------------------------------------------------
+/*
+ *---------------------------------------------------------------
* Constructor/argument parsing:
* <#multipath feature args> [<arg>]*
* <#hw_handler args> [hw_handler [<arg>]*]
@@ -834,13 +843,14 @@ static void trigger_event(struct work_struct *work)
* [<selector> <#selector args> [<arg>]*
* <#paths> <#per-path selector args>
* [<path> [<arg>]* ]+ ]+
- *---------------------------------------------------------------*/
+ *---------------------------------------------------------------
+ */
static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
struct dm_target *ti)
{
int r;
struct path_selector_type *pst;
- unsigned ps_argc;
+ unsigned int ps_argc;
static const struct dm_arg _args[] = {
{0, 1024, "invalid number of path selector args"},
@@ -983,7 +993,7 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as,
};
int r;
- unsigned i, nr_selector_args, nr_args;
+ unsigned int i, nr_selector_args, nr_args;
struct priority_group *pg;
struct dm_target *ti = m->ti;
@@ -1049,7 +1059,7 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as,
static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
{
- unsigned hw_argc;
+ unsigned int hw_argc;
int ret;
struct dm_target *ti = m->ti;
@@ -1086,7 +1096,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
goto fail;
}
j = sprintf(p, "%d", hw_argc - 1);
- for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
+ for (i = 0, p += j + 1; i <= hw_argc - 2; i++, p += j + 1)
j = sprintf(p, "%s", as->argv[i]);
}
dm_consume_args(as, hw_argc - 1);
@@ -1101,7 +1111,7 @@ fail:
static int parse_features(struct dm_arg_set *as, struct multipath *m)
{
int r;
- unsigned argc;
+ unsigned int argc;
struct dm_target *ti = m->ti;
const char *arg_name;
@@ -1170,7 +1180,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
return r;
}
-static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
+static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
/* target arguments */
static const struct dm_arg _args[] = {
@@ -1181,9 +1191,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
int r;
struct multipath *m;
struct dm_arg_set as;
- unsigned pg_count = 0;
- unsigned next_pg_num;
- unsigned long flags;
+ unsigned int pg_count = 0;
+ unsigned int next_pg_num;
as.argc = argc;
as.argv = argv;
@@ -1224,7 +1233,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
/* parse the priority groups */
while (as.argc) {
struct priority_group *pg;
- unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
+ unsigned int nr_valid_paths = atomic_read(&m->nr_valid_paths);
pg = parse_priority_group(&as, m);
if (IS_ERR(pg)) {
@@ -1248,9 +1257,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
@@ -1285,23 +1294,21 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m)
static void flush_multipath_work(struct multipath *m)
{
if (m->hw_handler_name) {
- unsigned long flags;
-
if (!atomic_read(&m->pg_init_in_progress))
goto skip;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (atomic_read(&m->pg_init_in_progress) &&
!test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
skip:
if (m->queue_mode == DM_TYPE_BIO_BASED)
@@ -1347,7 +1354,7 @@ static int fail_path(struct pgpath *pgpath)
dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
- schedule_work(&m->trigger_event);
+ queue_work(dm_mpath_wq, &m->trigger_event);
enable_nopath_timeout(m);
@@ -1363,11 +1370,10 @@ out:
static int reinstate_path(struct pgpath *pgpath)
{
int r = 0, run_queue = 0;
- unsigned long flags;
struct multipath *m = pgpath->pg->m;
- unsigned nr_valid_paths;
+ unsigned int nr_valid_paths;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (pgpath->is_active)
goto out;
@@ -1397,7 +1403,7 @@ static int reinstate_path(struct pgpath *pgpath)
schedule_work(&m->trigger_event);
out:
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
@@ -1412,8 +1418,7 @@ out:
/*
* Fail or reinstate all paths that match the provided struct dm_dev.
*/
-static int action_dev(struct multipath *m, struct dm_dev *dev,
- action_fn action)
+static int action_dev(struct multipath *m, dev_t dev, action_fn action)
{
int r = -EINVAL;
struct pgpath *pgpath;
@@ -1421,7 +1426,7 @@ static int action_dev(struct multipath *m, struct dm_dev *dev,
list_for_each_entry(pg, &m->priority_groups, list) {
list_for_each_entry(pgpath, &pg->pgpaths, list) {
- if (pgpath->path.dev == dev)
+ if (pgpath->path.dev->bdev->bd_dev == dev)
r = action(pgpath);
}
}
@@ -1433,15 +1438,19 @@ static int action_dev(struct multipath *m, struct dm_dev *dev,
* Temporarily try to avoid having to use the specified PG
*/
static void bypass_pg(struct multipath *m, struct priority_group *pg,
- bool bypassed)
+ bool bypassed, bool can_be_delayed)
{
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
pg->bypassed = bypassed;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (can_be_delayed && test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
spin_unlock_irqrestore(&m->lock, flags);
@@ -1454,27 +1463,30 @@ static void bypass_pg(struct multipath *m, struct priority_group *pg,
static int switch_pg_num(struct multipath *m, const char *pgstr)
{
struct priority_group *pg;
- unsigned pgnum;
- unsigned long flags;
+ unsigned int pgnum;
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
- DMWARN("invalid PG number supplied to switch_pg_num");
+ DMWARN("invalid PG number supplied to %s", __func__);
return -EINVAL;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
list_for_each_entry(pg, &m->priority_groups, list) {
pg->bypassed = false;
if (--pgnum)
continue;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
m->next_pg = pg;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
schedule_work(&m->trigger_event);
return 0;
@@ -1487,7 +1499,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
{
struct priority_group *pg;
- unsigned pgnum;
+ unsigned int pgnum;
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
@@ -1501,7 +1513,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
break;
}
- bypass_pg(m, pg, bypassed);
+ bypass_pg(m, pg, bypassed, true);
return 0;
}
@@ -1555,7 +1567,7 @@ static void pg_init_done(void *data, int errors)
* Probably doing something like FW upgrade on the
* controller so try the other pg.
*/
- bypass_pg(m, pg, true);
+ bypass_pg(m, pg, true, false);
break;
case SCSI_DH_RETRY:
/* Wait before retrying. */
@@ -1736,6 +1748,9 @@ static void multipath_presuspend(struct dm_target *ti)
{
struct multipath *m = ti->private;
+ spin_lock_irq(&m->lock);
+ m->is_suspending = true;
+ spin_unlock_irq(&m->lock);
/* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
queue_if_no_path(m, false, true, __func__);
@@ -1756,9 +1771,9 @@ static void multipath_postsuspend(struct dm_target *ti)
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
+ m->is_suspending = false;
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
@@ -1769,7 +1784,7 @@ static void multipath_resume(struct dm_target *ti)
test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
/*
@@ -1789,17 +1804,16 @@ static void multipath_resume(struct dm_target *ti)
* num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
*/
static void multipath_status(struct dm_target *ti, status_type_t type,
- unsigned status_flags, char *result, unsigned maxlen)
+ unsigned int status_flags, char *result, unsigned int maxlen)
{
int sz = 0, pg_counter, pgpath_counter;
- unsigned long flags;
struct multipath *m = ti->private;
struct priority_group *pg;
struct pgpath *p;
- unsigned pg_num;
+ unsigned int pg_num;
char state;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
/* Features */
if (type == STATUSTYPE_INFO)
@@ -1821,7 +1835,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
DMEMIT("retain_attached_hw_handler ");
if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
- switch(m->queue_mode) {
+ switch (m->queue_mode) {
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
@@ -1839,10 +1853,10 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u ", m->nr_priority_groups);
- if (m->next_pg)
- pg_num = m->next_pg->pg_num;
- else if (m->current_pg)
+ if (m->current_pg)
pg_num = m->current_pg->pg_num;
+ else if (m->next_pg)
+ pg_num = m->next_pg->pg_num;
else
pg_num = (m->nr_priority_groups ? 1 : 0);
@@ -1945,17 +1959,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
break;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
-static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
- char *result, unsigned maxlen)
+static int multipath_message(struct dm_target *ti, unsigned int argc, char **argv,
+ char *result, unsigned int maxlen)
{
int r = -EINVAL;
- struct dm_dev *dev;
+ dev_t dev;
struct multipath *m = ti->private;
action_fn action;
- unsigned long flags;
mutex_lock(&m->work_mutex);
@@ -1967,9 +1980,9 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
if (argc == 1) {
if (!strcasecmp(argv[0], "queue_if_no_path")) {
r = queue_if_no_path(m, true, false, __func__);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
goto out;
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
r = queue_if_no_path(m, false, false, __func__);
@@ -2001,7 +2014,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
goto out;
}
- r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
+ r = dm_devt_from_path(argv[1], &dev);
if (r) {
DMWARN("message: error getting device %s",
argv[1]);
@@ -2010,21 +2023,137 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
r = action_dev(m, dev, action);
- dm_put_device(ti, dev);
-
out:
mutex_unlock(&m->work_mutex);
return r;
}
+/*
+ * Perform a minimal read from the given path to find out whether the
+ * path still works. If a path error occurs, fail it.
+ */
+static int probe_path(struct pgpath *pgpath)
+{
+ struct block_device *bdev = pgpath->path.dev->bdev;
+ unsigned int read_size = bdev_logical_block_size(bdev);
+ struct page *page;
+ struct bio *bio;
+ blk_status_t status;
+ int r = 0;
+
+ if (WARN_ON_ONCE(read_size > PAGE_SIZE))
+ return -EINVAL;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ /* Perform a minimal read: Sector 0, length read_size */
+ bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL);
+ if (!bio) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ bio->bi_iter.bi_sector = 0;
+ __bio_add_page(bio, page, read_size, 0);
+ submit_bio_wait(bio);
+ status = bio->bi_status;
+ bio_put(bio);
+
+ if (status && blk_path_error(status))
+ fail_path(pgpath);
+
+out:
+ __free_page(page);
+ return r;
+}
+
+/*
+ * Probe all active paths in current_pg to find out whether they still work.
+ * Fail all paths that do not work.
+ *
+ * Return -ENOTCONN if no valid path is left (even outside of current_pg). We
+ * cannot probe paths in other pgs without switching current_pg, so if valid
+ * paths are only in different pgs, they may or may not work. Additionally
+ * we should not probe paths in a pathgroup that is in the process of
+ * Initializing. Userspace can submit a request and we'll switch and wait
+ * for the pathgroup to be initialized. If the request fails, it may need to
+ * probe again.
+ */
+static int probe_active_paths(struct multipath *m)
+{
+ struct pgpath *pgpath;
+ struct priority_group *pg = NULL;
+ int r = 0;
+
+ spin_lock_irq(&m->lock);
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) {
+ wait_event_lock_irq(m->probe_wait,
+ !test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags),
+ m->lock);
+ /*
+ * if we waited because a probe was already in progress,
+ * and it probed the current active pathgroup, don't
+ * reprobe. Just return the number of valid paths
+ */
+ if (m->current_pg == m->last_probed_pg)
+ goto skip_probe;
+ }
+ if (!m->current_pg || m->is_suspending ||
+ test_bit(MPATHF_QUEUE_IO, &m->flags))
+ goto skip_probe;
+ set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ pg = m->last_probed_pg = m->current_pg;
+ spin_unlock_irq(&m->lock);
+
+ list_for_each_entry(pgpath, &pg->pgpaths, list) {
+ if (pg != READ_ONCE(m->current_pg) ||
+ READ_ONCE(m->is_suspending))
+ goto out;
+ if (!pgpath->is_active)
+ continue;
+
+ r = probe_path(pgpath);
+ if (r < 0)
+ goto out;
+ }
+
+out:
+ spin_lock_irq(&m->lock);
+ clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
+skip_probe:
+ if (r == 0 && !atomic_read(&m->nr_valid_paths))
+ r = -ENOTCONN;
+ spin_unlock_irq(&m->lock);
+ if (pg)
+ wake_up(&m->probe_wait);
+ return r;
+}
+
static int multipath_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev)
+ struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
- unsigned long flags;
int r;
+ if (_IOC_TYPE(cmd) == DM_IOCTL) {
+ *forward = false;
+ switch (cmd) {
+ case DM_MPATH_PROBE_PATHS:
+ return probe_active_paths(m);
+ default:
+ return -ENOTTY;
+ }
+ }
+
pgpath = READ_ONCE(m->current_pgpath);
if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
pgpath = choose_pgpath(m, 0);
@@ -2040,10 +2169,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
} else {
/* No path is available */
r = -EIO;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
r = -ENOTCONN;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
if (r == -ENOTCONN) {
@@ -2051,10 +2180,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
/* Path status changed, redo selection */
(void) choose_pgpath(m, 0);
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
(void) __pg_init_all_paths(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
@@ -2116,6 +2245,7 @@ static int multipath_busy(struct dm_target *ti)
/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
if (!atomic_read(&m->nr_valid_paths)) {
unsigned long flags;
+
spin_lock_irqsave(&m->lock, flags);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
spin_unlock_irqrestore(&m->lock, flags);
@@ -2168,12 +2298,14 @@ static int multipath_busy(struct dm_target *ti)
return busy;
}
-/*-----------------------------------------------------------------
+/*
+ *---------------------------------------------------------------
* Module setup
- *---------------------------------------------------------------*/
+ *---------------------------------------------------------------
+ */
static struct target_type multipath_target = {
.name = "multipath",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
@@ -2196,12 +2328,11 @@ static struct target_type multipath_target = {
static int __init dm_multipath_init(void)
{
- int r;
+ int r = -ENOMEM;
kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
if (!kmultipathd) {
DMERR("failed to create workqueue kmpathd");
- r = -ENOMEM;
goto bad_alloc_kmultipathd;
}
@@ -2215,20 +2346,24 @@ static int __init dm_multipath_init(void)
WQ_MEM_RECLAIM);
if (!kmpath_handlerd) {
DMERR("failed to create workqueue kmpath_handlerd");
- r = -ENOMEM;
goto bad_alloc_kmpath_handlerd;
}
+ dm_mpath_wq = alloc_workqueue("dm_mpath_wq", 0, 0);
+ if (!dm_mpath_wq) {
+ DMERR("failed to create workqueue dm_mpath_wq");
+ goto bad_alloc_dm_mpath_wq;
+ }
+
r = dm_register_target(&multipath_target);
- if (r < 0) {
- DMERR("request-based register failed %d", r);
- r = -EINVAL;
+ if (r < 0)
goto bad_register_target;
- }
return 0;
bad_register_target:
+ destroy_workqueue(dm_mpath_wq);
+bad_alloc_dm_mpath_wq:
destroy_workqueue(kmpath_handlerd);
bad_alloc_kmpath_handlerd:
destroy_workqueue(kmultipathd);
@@ -2238,6 +2373,7 @@ bad_alloc_kmultipathd:
static void __exit dm_multipath_exit(void)
{
+ destroy_workqueue(dm_mpath_wq);
destroy_workqueue(kmpath_handlerd);
destroy_workqueue(kmultipathd);
@@ -2247,10 +2383,9 @@ static void __exit dm_multipath_exit(void)
module_init(dm_multipath_init);
module_exit(dm_multipath_exit);
-module_param_named(queue_if_no_path_timeout_secs,
- queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR);
+module_param_named(queue_if_no_path_timeout_secs, queue_if_no_path_timeout_secs, ulong, 0644);
MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds");
MODULE_DESCRIPTION(DM_NAME " multipath target");
-MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
+MODULE_AUTHOR("Sistina Software <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");