summaryrefslogtreecommitdiff
path: root/drivers/scsi/device_handler/scsi_dh_alua.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/device_handler/scsi_dh_alua.c')
-rw-r--r--drivers/scsi/device_handler/scsi_dh_alua.c1477
1 files changed, 943 insertions, 534 deletions
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 68adb8955d2d..6fd89ae33059 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -1,41 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic SCSI-3 ALUA SCSI Device Handler
*
* Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
* All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
*/
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/module.h>
+#include <linux/unaligned.h>
#include <scsi/scsi.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_dbg.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_dh.h>
#define ALUA_DH_NAME "alua"
-#define ALUA_DH_VER "1.3"
-
-#define TPGS_STATE_OPTIMIZED 0x0
-#define TPGS_STATE_NONOPTIMIZED 0x1
-#define TPGS_STATE_STANDBY 0x2
-#define TPGS_STATE_UNAVAILABLE 0x3
-#define TPGS_STATE_LBA_DEPENDENT 0x4
-#define TPGS_STATE_OFFLINE 0xe
-#define TPGS_STATE_TRANSITIONING 0xf
+#define ALUA_DH_VER "2.0"
#define TPGS_SUPPORT_NONE 0x00
#define TPGS_SUPPORT_OPTIMIZED 0x01
@@ -45,6 +26,7 @@
#define TPGS_SUPPORT_LBA_DEPENDENT 0x10
#define TPGS_SUPPORT_OFFLINE 0x40
#define TPGS_SUPPORT_TRANSITION 0x80
+#define TPGS_SUPPORT_ALL 0xdf
#define RTPG_FMT_MASK 0x70
#define RTPG_FMT_EXT_HDR 0x10
@@ -54,27 +36,63 @@
#define TPGS_MODE_IMPLICIT 0x1
#define TPGS_MODE_EXPLICIT 0x2
-#define ALUA_INQUIRY_SIZE 36
+#define ALUA_RTPG_SIZE 128
#define ALUA_FAILOVER_TIMEOUT 60
#define ALUA_FAILOVER_RETRIES 5
+#define ALUA_RTPG_DELAY_MSECS 5
+#define ALUA_RTPG_RETRY_DELAY 2
-/* flags passed from user level */
-#define ALUA_OPTIMIZE_STPG 1
+/* device handler flags */
+#define ALUA_OPTIMIZE_STPG 0x01
+#define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
+/* State machine flags */
+#define ALUA_PG_RUN_RTPG 0x10
+#define ALUA_PG_RUN_STPG 0x20
+#define ALUA_PG_RUNNING 0x40
-struct alua_dh_data {
+static uint optimize_stpg;
+module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
+
+static LIST_HEAD(port_group_list);
+static DEFINE_SPINLOCK(port_group_lock);
+static struct workqueue_struct *kaluad_wq;
+
+struct alua_port_group {
+ struct kref kref;
+ struct rcu_head rcu;
+ struct list_head node;
+ struct list_head dh_list;
+ unsigned char device_id_str[256];
+ int device_id_len;
int group_id;
- int rel_port;
int tpgs;
int state;
int pref;
+ int valid_states;
unsigned flags; /* used for optimizing STPG */
- unsigned char inq[ALUA_INQUIRY_SIZE];
- unsigned char *buff;
- int bufflen;
unsigned char transition_tmo;
- unsigned char sense[SCSI_SENSE_BUFFERSIZE];
- int senselen;
+ unsigned long expiry;
+ unsigned long interval;
+ struct delayed_work rtpg_work;
+ spinlock_t lock;
+ struct list_head rtpg_list;
+ struct scsi_device *rtpg_sdev;
+};
+
+struct alua_dh_data {
+ struct list_head node;
+ struct alua_port_group __rcu *pg;
+ int group_id;
+ spinlock_t pg_lock;
struct scsi_device *sdev;
+ int init_error;
+ struct mutex init_mutex;
+ bool disabled;
+};
+
+struct alua_queue_data {
+ struct list_head entry;
activate_complete callback_fn;
void *callback_data;
};
@@ -82,238 +100,168 @@ struct alua_dh_data {
#define ALUA_POLICY_SWITCH_CURRENT 0
#define ALUA_POLICY_SWITCH_ALL 1
-static char print_alua_state(int);
-static int alua_check_sense(struct scsi_device *, struct scsi_sense_hdr *);
-
-static inline struct alua_dh_data *get_alua_data(struct scsi_device *sdev)
-{
- struct scsi_dh_data *scsi_dh_data = sdev->scsi_dh_data;
- BUG_ON(scsi_dh_data == NULL);
- return ((struct alua_dh_data *) scsi_dh_data->buf);
-}
-
-static int realloc_buffer(struct alua_dh_data *h, unsigned len)
-{
- if (h->buff && h->buff != h->inq)
- kfree(h->buff);
-
- h->buff = kmalloc(len, GFP_NOIO);
- if (!h->buff) {
- h->buff = h->inq;
- h->bufflen = ALUA_INQUIRY_SIZE;
- return 1;
- }
- h->bufflen = len;
- return 0;
-}
+static void alua_rtpg_work(struct work_struct *work);
+static bool alua_rtpg_queue(struct alua_port_group *pg,
+ struct scsi_device *sdev,
+ struct alua_queue_data *qdata, bool force);
+static void alua_check(struct scsi_device *sdev, bool force);
-static struct request *get_alua_req(struct scsi_device *sdev,
- void *buffer, unsigned buflen, int rw)
+static void release_port_group(struct kref *kref)
{
- struct request *rq;
- struct request_queue *q = sdev->request_queue;
-
- rq = blk_get_request(q, rw, GFP_NOIO);
-
- if (!rq) {
- sdev_printk(KERN_INFO, sdev,
- "%s: blk_get_request failed\n", __func__);
- return NULL;
- }
-
- if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
- blk_put_request(rq);
- sdev_printk(KERN_INFO, sdev,
- "%s: blk_rq_map_kern failed\n", __func__);
- return NULL;
- }
-
- rq->cmd_type = REQ_TYPE_BLOCK_PC;
- rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
- REQ_FAILFAST_DRIVER;
- rq->retries = ALUA_FAILOVER_RETRIES;
- rq->timeout = ALUA_FAILOVER_TIMEOUT * HZ;
-
- return rq;
+ struct alua_port_group *pg;
+
+ pg = container_of(kref, struct alua_port_group, kref);
+ if (pg->rtpg_sdev)
+ flush_delayed_work(&pg->rtpg_work);
+ spin_lock(&port_group_lock);
+ list_del(&pg->node);
+ spin_unlock(&port_group_lock);
+ kfree_rcu(pg, rcu);
}
/*
- * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command
+ * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
* @sdev: sdev the command should be sent to
*/
-static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
+static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
+ int bufflen, struct scsi_sense_hdr *sshdr, int flags)
{
- struct request *rq;
- int err = SCSI_DH_RES_TEMP_UNAVAIL;
-
- rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
- if (!rq)
- goto done;
+ u8 cdb[MAX_COMMAND_SIZE];
+ blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
+ REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+ const struct scsi_exec_args exec_args = {
+ .sshdr = sshdr,
+ };
/* Prepare the command. */
- rq->cmd[0] = INQUIRY;
- rq->cmd[1] = 1;
- rq->cmd[2] = 0x83;
- rq->cmd[4] = h->bufflen;
- rq->cmd_len = COMMAND_SIZE(INQUIRY);
-
- rq->sense = h->sense;
- memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
- rq->sense_len = h->senselen = 0;
-
- err = blk_execute_rq(rq->q, NULL, rq, 1);
- if (err == -EIO) {
- sdev_printk(KERN_INFO, sdev,
- "%s: evpd inquiry failed with %x\n",
- ALUA_DH_NAME, rq->errors);
- h->senselen = rq->sense_len;
- err = SCSI_DH_IO;
- }
- blk_put_request(rq);
-done:
- return err;
+ memset(cdb, 0x0, MAX_COMMAND_SIZE);
+ cdb[0] = MAINTENANCE_IN;
+ if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
+ cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
+ else
+ cdb[1] = MI_REPORT_TARGET_PGS;
+ put_unaligned_be32(bufflen, &cdb[6]);
+
+ return scsi_execute_cmd(sdev, cdb, opf, buff, bufflen,
+ ALUA_FAILOVER_TIMEOUT * HZ,
+ ALUA_FAILOVER_RETRIES, &exec_args);
}
/*
- * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
- * @sdev: sdev the command should be sent to
+ * submit_stpg - Issue a SET TARGET PORT GROUP command
+ *
+ * Currently we're only setting the current target port group state
+ * to 'active/optimized' and let the array firmware figure out
+ * the states of the remaining groups.
*/
-static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h,
- bool rtpg_ext_hdr_req)
+static int submit_stpg(struct scsi_device *sdev, int group_id,
+ struct scsi_sense_hdr *sshdr)
{
- struct request *rq;
- int err = SCSI_DH_RES_TEMP_UNAVAIL;
+ u8 cdb[MAX_COMMAND_SIZE];
+ unsigned char stpg_data[8];
+ int stpg_len = 8;
+ blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV |
+ REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+ const struct scsi_exec_args exec_args = {
+ .sshdr = sshdr,
+ };
- rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
- if (!rq)
- goto done;
+ /* Prepare the data buffer */
+ memset(stpg_data, 0, stpg_len);
+ stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
+ put_unaligned_be16(group_id, &stpg_data[6]);
/* Prepare the command. */
- rq->cmd[0] = MAINTENANCE_IN;
- if (rtpg_ext_hdr_req)
- rq->cmd[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
- else
- rq->cmd[1] = MI_REPORT_TARGET_PGS;
- rq->cmd[6] = (h->bufflen >> 24) & 0xff;
- rq->cmd[7] = (h->bufflen >> 16) & 0xff;
- rq->cmd[8] = (h->bufflen >> 8) & 0xff;
- rq->cmd[9] = h->bufflen & 0xff;
- rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN);
-
- rq->sense = h->sense;
- memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
- rq->sense_len = h->senselen = 0;
-
- err = blk_execute_rq(rq->q, NULL, rq, 1);
- if (err == -EIO) {
- sdev_printk(KERN_INFO, sdev,
- "%s: rtpg failed with %x\n",
- ALUA_DH_NAME, rq->errors);
- h->senselen = rq->sense_len;
- err = SCSI_DH_IO;
- }
- blk_put_request(rq);
-done:
- return err;
+ memset(cdb, 0x0, MAX_COMMAND_SIZE);
+ cdb[0] = MAINTENANCE_OUT;
+ cdb[1] = MO_SET_TARGET_PGS;
+ put_unaligned_be32(stpg_len, &cdb[6]);
+
+ return scsi_execute_cmd(sdev, cdb, opf, stpg_data,
+ stpg_len, ALUA_FAILOVER_TIMEOUT * HZ,
+ ALUA_FAILOVER_RETRIES, &exec_args);
}
-/*
- * alua_stpg - Evaluate SET TARGET GROUP STATES
- * @sdev: the device to be evaluated
- * @state: the new target group state
- *
- * Send a SET TARGET GROUP STATES command to the device.
- * We only have to test here if we should resubmit the command;
- * any other error is assumed as a failure.
- */
-static void stpg_endio(struct request *req, int error)
+static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
+ int group_id)
{
- struct alua_dh_data *h = req->end_io_data;
- struct scsi_sense_hdr sense_hdr;
- unsigned err = SCSI_DH_OK;
+ struct alua_port_group *pg;
- if (host_byte(req->errors) != DID_OK ||
- msg_byte(req->errors) != COMMAND_COMPLETE) {
- err = SCSI_DH_IO;
- goto done;
- }
+ if (!id_str || !id_size || !strlen(id_str))
+ return NULL;
- if (req->sense_len > 0) {
- err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
- &sense_hdr);
- if (!err) {
- err = SCSI_DH_IO;
- goto done;
- }
- err = alua_check_sense(h->sdev, &sense_hdr);
- if (err == ADD_TO_MLQUEUE) {
- err = SCSI_DH_RETRY;
- goto done;
- }
- sdev_printk(KERN_INFO, h->sdev,
- "%s: stpg sense code: %02x/%02x/%02x\n",
- ALUA_DH_NAME, sense_hdr.sense_key,
- sense_hdr.asc, sense_hdr.ascq);
- err = SCSI_DH_IO;
- } else if (error)
- err = SCSI_DH_IO;
-
- if (err == SCSI_DH_OK) {
- h->state = TPGS_STATE_OPTIMIZED;
- sdev_printk(KERN_INFO, h->sdev,
- "%s: port group %02x switched to state %c\n",
- ALUA_DH_NAME, h->group_id,
- print_alua_state(h->state));
- }
-done:
- req->end_io_data = NULL;
- __blk_put_request(req->q, req);
- if (h->callback_fn) {
- h->callback_fn(h->callback_data, err);
- h->callback_fn = h->callback_data = NULL;
+ list_for_each_entry(pg, &port_group_list, node) {
+ if (pg->group_id != group_id)
+ continue;
+ if (!pg->device_id_len || pg->device_id_len != id_size)
+ continue;
+ if (strncmp(pg->device_id_str, id_str, id_size))
+ continue;
+ if (!kref_get_unless_zero(&pg->kref))
+ continue;
+ return pg;
}
- return;
+
+ return NULL;
}
/*
- * submit_stpg - Issue a SET TARGET GROUP STATES command
+ * alua_alloc_pg - Allocate a new port_group structure
+ * @sdev: scsi device
+ * @group_id: port group id
+ * @tpgs: target port group settings
*
- * Currently we're only setting the current target port group state
- * to 'active/optimized' and let the array firmware figure out
- * the states of the remaining groups.
+ * Allocate a new port_group structure for a given
+ * device.
*/
-static unsigned submit_stpg(struct alua_dh_data *h)
+static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
+ int group_id, int tpgs)
{
- struct request *rq;
- int stpg_len = 8;
- struct scsi_device *sdev = h->sdev;
+ struct alua_port_group *pg, *tmp_pg;
- /* Prepare the data buffer */
- memset(h->buff, 0, stpg_len);
- h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f;
- h->buff[6] = (h->group_id >> 8) & 0xff;
- h->buff[7] = h->group_id & 0xff;
+ pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
+ if (!pg)
+ return ERR_PTR(-ENOMEM);
- rq = get_alua_req(sdev, h->buff, stpg_len, WRITE);
- if (!rq)
- return SCSI_DH_RES_TEMP_UNAVAIL;
+ pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
+ sizeof(pg->device_id_str));
+ if (pg->device_id_len <= 0) {
+ /*
+ * TPGS supported but no device identification found.
+ * Generate private device identification.
+ */
+ sdev_printk(KERN_INFO, sdev,
+ "%s: No device descriptors found\n",
+ ALUA_DH_NAME);
+ pg->device_id_str[0] = '\0';
+ pg->device_id_len = 0;
+ }
+ pg->group_id = group_id;
+ pg->tpgs = tpgs;
+ pg->state = SCSI_ACCESS_STATE_OPTIMAL;
+ pg->valid_states = TPGS_SUPPORT_ALL;
+ if (optimize_stpg)
+ pg->flags |= ALUA_OPTIMIZE_STPG;
+ kref_init(&pg->kref);
+ INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
+ INIT_LIST_HEAD(&pg->rtpg_list);
+ INIT_LIST_HEAD(&pg->node);
+ INIT_LIST_HEAD(&pg->dh_list);
+ spin_lock_init(&pg->lock);
+
+ spin_lock(&port_group_lock);
+ tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
+ group_id);
+ if (tmp_pg) {
+ spin_unlock(&port_group_lock);
+ kfree(pg);
+ return tmp_pg;
+ }
- /* Prepare the command. */
- rq->cmd[0] = MAINTENANCE_OUT;
- rq->cmd[1] = MO_SET_TARGET_PGS;
- rq->cmd[6] = (stpg_len >> 24) & 0xff;
- rq->cmd[7] = (stpg_len >> 16) & 0xff;
- rq->cmd[8] = (stpg_len >> 8) & 0xff;
- rq->cmd[9] = stpg_len & 0xff;
- rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT);
-
- rq->sense = h->sense;
- memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
- rq->sense_len = h->senselen = 0;
- rq->end_io_data = h;
-
- blk_execute_rq_nowait(rq->q, NULL, rq, 1, stpg_endio);
- return SCSI_DH_OK;
+ list_add(&pg->node, &port_group_list);
+ spin_unlock(&port_group_lock);
+
+ return pg;
}
/*
@@ -323,12 +271,23 @@ static unsigned submit_stpg(struct alua_dh_data *h)
* Examine the TPGS setting of the sdev to find out if ALUA
* is supported.
*/
-static int alua_check_tpgs(struct scsi_device *sdev, struct alua_dh_data *h)
+static int alua_check_tpgs(struct scsi_device *sdev)
{
- int err = SCSI_DH_OK;
+ int tpgs = TPGS_MODE_NONE;
+
+ /*
+ * ALUA support for non-disk devices is fraught with
+ * difficulties, so disable it for now.
+ */
+ if (sdev->type != TYPE_DISK) {
+ sdev_printk(KERN_INFO, sdev,
+ "%s: disable for non-disk devices\n",
+ ALUA_DH_NAME);
+ return tpgs;
+ }
- h->tpgs = scsi_device_tpgs(sdev);
- switch (h->tpgs) {
+ tpgs = scsi_device_tpgs(sdev);
+ switch (tpgs) {
case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
sdev_printk(KERN_INFO, sdev,
"%s: supports implicit and explicit TPGS\n",
@@ -342,71 +301,38 @@ static int alua_check_tpgs(struct scsi_device *sdev, struct alua_dh_data *h)
sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
ALUA_DH_NAME);
break;
- default:
- h->tpgs = TPGS_MODE_NONE;
+ case TPGS_MODE_NONE:
sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
ALUA_DH_NAME);
- err = SCSI_DH_DEV_UNSUPP;
+ break;
+ default:
+ sdev_printk(KERN_INFO, sdev,
+ "%s: unsupported TPGS setting %d\n",
+ ALUA_DH_NAME, tpgs);
+ tpgs = TPGS_MODE_NONE;
break;
}
- return err;
+ return tpgs;
}
/*
- * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83
+ * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
* @sdev: device to be checked
*
* Extract the relative target port and the target port group
* descriptor from the list of identificators.
*/
-static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
+static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
+ int tpgs)
{
- int len;
- unsigned err;
- unsigned char *d;
-
- retry:
- err = submit_vpd_inquiry(sdev, h);
-
- if (err != SCSI_DH_OK)
- return err;
-
- /* Check if vpd page exceeds initial buffer */
- len = (h->buff[2] << 8) + h->buff[3] + 4;
- if (len > h->bufflen) {
- /* Resubmit with the correct length */
- if (realloc_buffer(h, len)) {
- sdev_printk(KERN_WARNING, sdev,
- "%s: kmalloc buffer failed\n",
- ALUA_DH_NAME);
- /* Temporary failure, bypass */
- return SCSI_DH_DEV_TEMP_BUSY;
- }
- goto retry;
- }
-
- /*
- * Now look for the correct descriptor.
- */
- d = h->buff + 4;
- while (d < h->buff + len) {
- switch (d[1] & 0xf) {
- case 0x4:
- /* Relative target port */
- h->rel_port = (d[6] << 8) + d[7];
- break;
- case 0x5:
- /* Target port group */
- h->group_id = (d[6] << 8) + d[7];
- break;
- default:
- break;
- }
- d += d[3] + 4;
- }
+ int rel_port = -1, group_id;
+ struct alua_port_group *pg, *old_pg = NULL;
+ bool pg_updated = false;
+ unsigned long flags;
- if (h->group_id == -1) {
+ group_id = scsi_vpd_tpg_id(sdev, &rel_port);
+ if (group_id < 0) {
/*
* Internal error; TPGS supported but required
* VPD identification descriptors not present.
@@ -415,70 +341,125 @@ static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
sdev_printk(KERN_INFO, sdev,
"%s: No target port descriptors found\n",
ALUA_DH_NAME);
- h->state = TPGS_STATE_OPTIMIZED;
- h->tpgs = TPGS_MODE_NONE;
- err = SCSI_DH_DEV_UNSUPP;
- } else {
+ return SCSI_DH_DEV_UNSUPP;
+ }
+
+ pg = alua_alloc_pg(sdev, group_id, tpgs);
+ if (IS_ERR(pg)) {
+ if (PTR_ERR(pg) == -ENOMEM)
+ return SCSI_DH_NOMEM;
+ return SCSI_DH_DEV_UNSUPP;
+ }
+ if (pg->device_id_len)
sdev_printk(KERN_INFO, sdev,
- "%s: port group %02x rel port %02x\n",
- ALUA_DH_NAME, h->group_id, h->rel_port);
+ "%s: device %s port group %x rel port %x\n",
+ ALUA_DH_NAME, pg->device_id_str,
+ group_id, rel_port);
+ else
+ sdev_printk(KERN_INFO, sdev,
+ "%s: port group %x rel port %x\n",
+ ALUA_DH_NAME, group_id, rel_port);
+
+ kref_get(&pg->kref);
+
+ /* Check for existing port group references */
+ spin_lock(&h->pg_lock);
+ old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
+ if (old_pg != pg) {
+ /* port group has changed. Update to new port group */
+ if (h->pg) {
+ spin_lock_irqsave(&old_pg->lock, flags);
+ list_del_rcu(&h->node);
+ spin_unlock_irqrestore(&old_pg->lock, flags);
+ }
+ rcu_assign_pointer(h->pg, pg);
+ pg_updated = true;
}
- return err;
+ spin_lock_irqsave(&pg->lock, flags);
+ if (pg_updated)
+ list_add_rcu(&h->node, &pg->dh_list);
+ spin_unlock_irqrestore(&pg->lock, flags);
+
+ spin_unlock(&h->pg_lock);
+
+ alua_rtpg_queue(pg, sdev, NULL, true);
+ kref_put(&pg->kref, release_port_group);
+
+ if (old_pg)
+ kref_put(&old_pg->kref, release_port_group);
+
+ return SCSI_DH_OK;
}
-static char print_alua_state(int state)
+static char print_alua_state(unsigned char state)
{
switch (state) {
- case TPGS_STATE_OPTIMIZED:
+ case SCSI_ACCESS_STATE_OPTIMAL:
return 'A';
- case TPGS_STATE_NONOPTIMIZED:
+ case SCSI_ACCESS_STATE_ACTIVE:
return 'N';
- case TPGS_STATE_STANDBY:
+ case SCSI_ACCESS_STATE_STANDBY:
return 'S';
- case TPGS_STATE_UNAVAILABLE:
+ case SCSI_ACCESS_STATE_UNAVAILABLE:
return 'U';
- case TPGS_STATE_LBA_DEPENDENT:
+ case SCSI_ACCESS_STATE_LBA:
return 'L';
- case TPGS_STATE_OFFLINE:
+ case SCSI_ACCESS_STATE_OFFLINE:
return 'O';
- case TPGS_STATE_TRANSITIONING:
+ case SCSI_ACCESS_STATE_TRANSITIONING:
return 'T';
default:
return 'X';
}
}
-static int alua_check_sense(struct scsi_device *sdev,
- struct scsi_sense_hdr *sense_hdr)
+static void alua_handle_state_transition(struct scsi_device *sdev)
+{
+ struct alua_dh_data *h = sdev->handler_data;
+ struct alua_port_group *pg;
+
+ rcu_read_lock();
+ pg = rcu_dereference(h->pg);
+ if (pg)
+ pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+ rcu_read_unlock();
+ alua_check(sdev, false);
+}
+
+static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
+ struct scsi_sense_hdr *sense_hdr)
{
switch (sense_hdr->sense_key) {
case NOT_READY:
- if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
+ if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
/*
* LUN Not Accessible - ALUA state transition
*/
- return ADD_TO_MLQUEUE;
- if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b)
- /*
- * LUN Not Accessible -- Target port in standby state
- */
- return SUCCESS;
- if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c)
+ alua_handle_state_transition(sdev);
+ return NEEDS_RETRY;
+ }
+ break;
+ case UNIT_ATTENTION:
+ if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
/*
- * LUN Not Accessible -- Target port in unavailable state
+ * LUN Not Accessible - ALUA state transition
*/
- return SUCCESS;
- if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12)
+ alua_handle_state_transition(sdev);
+ return NEEDS_RETRY;
+ }
+ if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
/*
- * LUN Not Ready -- Offline
+ * Power On, Reset, or Bus Device Reset.
+ * Might have obscured a state transition,
+ * so schedule a recheck.
*/
- return SUCCESS;
- break;
- case UNIT_ATTENTION:
- if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
+ alua_check(sdev, true);
+ return ADD_TO_MLQUEUE;
+ }
+ if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
/*
- * Power On, Reset, or Bus Device Reset, just retry.
+ * Device internal reset
*/
return ADD_TO_MLQUEUE;
if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
@@ -486,16 +467,20 @@ static int alua_check_sense(struct scsi_device *sdev,
* Mode Parameters Changed
*/
return ADD_TO_MLQUEUE;
- if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
+ if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
/*
* ALUA state changed
*/
+ alua_check(sdev, true);
return ADD_TO_MLQUEUE;
- if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
+ }
+ if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
/*
* Implicit ALUA state transition failed
*/
+ alua_check(sdev, true);
return ADD_TO_MLQUEUE;
+ }
if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
/*
* Inquiry data has changed
@@ -515,6 +500,31 @@ static int alua_check_sense(struct scsi_device *sdev,
}
/*
+ * alua_tur - Send a TEST UNIT READY
+ * @sdev: device to which the TEST UNIT READY command should be send
+ *
+ * Send a TEST UNIT READY to @sdev to figure out the device state
+ * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
+ * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
+ */
+static int alua_tur(struct scsi_device *sdev)
+{
+ struct scsi_sense_hdr sense_hdr;
+ int retval;
+
+ retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
+ ALUA_FAILOVER_RETRIES, &sense_hdr);
+ if ((sense_hdr.sense_key == NOT_READY ||
+ sense_hdr.sense_key == UNIT_ATTENTION) &&
+ sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
+ return SCSI_DH_RETRY;
+ else if (retval)
+ return SCSI_DH_IO;
+ else
+ return SCSI_DH_OK;
+}
+
+/*
* alua_rtpg - Evaluate REPORT TARGET GROUP STATES
* @sdev: the device to be evaluated.
*
@@ -522,30 +532,70 @@ static int alua_check_sense(struct scsi_device *sdev,
* Returns SCSI_DH_DEV_OFFLINED if the path is
* found to be unusable.
*/
-static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
+static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
{
struct scsi_sense_hdr sense_hdr;
- int len, k, off, valid_states = 0;
- unsigned char *ucp;
+ struct alua_port_group *tmp_pg;
+ int len, k, off, bufflen = ALUA_RTPG_SIZE;
+ int group_id_old, state_old, pref_old, valid_states_old;
+ unsigned char *desc, *buff;
unsigned err;
- bool rtpg_ext_hdr_req = 1;
- unsigned long expiry, interval = 0;
+ int retval;
unsigned int tpg_desc_tbl_off;
unsigned char orig_transition_tmo;
+ unsigned long flags;
+ bool transitioning_sense = false;
- if (!h->transition_tmo)
- expiry = round_jiffies_up(jiffies + ALUA_FAILOVER_TIMEOUT * HZ);
- else
- expiry = round_jiffies_up(jiffies + h->transition_tmo * HZ);
+ group_id_old = pg->group_id;
+ state_old = pg->state;
+ pref_old = pg->pref;
+ valid_states_old = pg->valid_states;
+
+ if (!pg->expiry) {
+ unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
+
+ if (pg->transition_tmo)
+ transition_tmo = pg->transition_tmo * HZ;
+
+ pg->expiry = round_jiffies_up(jiffies + transition_tmo);
+ }
+
+ buff = kzalloc(bufflen, GFP_KERNEL);
+ if (!buff)
+ return SCSI_DH_DEV_TEMP_BUSY;
retry:
- err = submit_rtpg(sdev, h, rtpg_ext_hdr_req);
+ err = 0;
+ retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
- if (err == SCSI_DH_IO && h->senselen > 0) {
- err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
- &sense_hdr);
- if (!err)
+ if (retval) {
+ /*
+ * Some (broken) implementations have a habit of returning
+ * an error during things like firmware update etc.
+ * But if the target only supports active/optimized there's
+ * not much we can do; it's not that we can switch paths
+ * or anything.
+ * So ignore any errors to avoid spurious failures during
+ * path failover.
+ */
+ if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
+ sdev_printk(KERN_INFO, sdev,
+ "%s: ignoring rtpg result %d\n",
+ ALUA_DH_NAME, retval);
+ kfree(buff);
+ return SCSI_DH_OK;
+ }
+ if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
+ sdev_printk(KERN_INFO, sdev,
+ "%s: rtpg failed, result %d\n",
+ ALUA_DH_NAME, retval);
+ kfree(buff);
+ if (retval < 0)
+ return SCSI_DH_DEV_TEMP_BUSY;
+ if (host_byte(retval) == DID_NO_CONNECT)
+ return SCSI_DH_RES_TEMP_UNAVAIL;
return SCSI_DH_IO;
+ }
/*
* submit_rtpg() has failed on existing arrays
@@ -554,107 +604,459 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
* even though it shouldn't according to T10.
* The retry without rtpg_ext_hdr_req set
* handles this.
+ * Note: some arrays return a sense key of ILLEGAL_REQUEST
+ * with ASC 00h if they don't support the extended header.
*/
- if (rtpg_ext_hdr_req == 1 &&
- sense_hdr.sense_key == ILLEGAL_REQUEST &&
- sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
- rtpg_ext_hdr_req = 0;
+ if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
+ sense_hdr.sense_key == ILLEGAL_REQUEST) {
+ pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
goto retry;
}
-
- err = alua_check_sense(sdev, &sense_hdr);
- if (err == ADD_TO_MLQUEUE && time_before(jiffies, expiry))
- goto retry;
- sdev_printk(KERN_INFO, sdev,
- "%s: rtpg sense code %02x/%02x/%02x\n",
- ALUA_DH_NAME, sense_hdr.sense_key,
- sense_hdr.asc, sense_hdr.ascq);
- err = SCSI_DH_IO;
+ /*
+ * If the array returns with 'ALUA state transition'
+ * sense code here it cannot return RTPG data during
+ * transition. So set the state to 'transitioning' directly.
+ */
+ if (sense_hdr.sense_key == NOT_READY &&
+ sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
+ transitioning_sense = true;
+ goto skip_rtpg;
+ }
+ /*
+ * Retry on any other UNIT ATTENTION occurred.
+ */
+ if (sense_hdr.sense_key == UNIT_ATTENTION)
+ err = SCSI_DH_RETRY;
+ if (err == SCSI_DH_RETRY &&
+ pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
+ sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
+ ALUA_DH_NAME);
+ scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
+ kfree(buff);
+ return err;
+ }
+ sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
+ ALUA_DH_NAME);
+ scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
+ kfree(buff);
+ pg->expiry = 0;
+ return SCSI_DH_IO;
}
- if (err != SCSI_DH_OK)
- return err;
- len = (h->buff[0] << 24) + (h->buff[1] << 16) +
- (h->buff[2] << 8) + h->buff[3] + 4;
+ len = get_unaligned_be32(&buff[0]) + 4;
- if (len > h->bufflen) {
+ if (len > bufflen) {
/* Resubmit with the correct length */
- if (realloc_buffer(h, len)) {
+ kfree(buff);
+ bufflen = len;
+ buff = kmalloc(bufflen, GFP_KERNEL);
+ if (!buff) {
sdev_printk(KERN_WARNING, sdev,
"%s: kmalloc buffer failed\n",__func__);
/* Temporary failure, bypass */
+ pg->expiry = 0;
return SCSI_DH_DEV_TEMP_BUSY;
}
goto retry;
}
- orig_transition_tmo = h->transition_tmo;
- if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && h->buff[5] != 0)
- h->transition_tmo = h->buff[5];
+ orig_transition_tmo = pg->transition_tmo;
+ if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
+ pg->transition_tmo = buff[5];
else
- h->transition_tmo = ALUA_FAILOVER_TIMEOUT;
+ pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
- if (orig_transition_tmo != h->transition_tmo) {
+ if (orig_transition_tmo != pg->transition_tmo) {
sdev_printk(KERN_INFO, sdev,
"%s: transition timeout set to %d seconds\n",
- ALUA_DH_NAME, h->transition_tmo);
- expiry = jiffies + h->transition_tmo * HZ;
+ ALUA_DH_NAME, pg->transition_tmo);
+ pg->expiry = jiffies + pg->transition_tmo * HZ;
}
- if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
+ if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
tpg_desc_tbl_off = 8;
else
tpg_desc_tbl_off = 4;
- for (k = tpg_desc_tbl_off, ucp = h->buff + tpg_desc_tbl_off;
+ for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
k < len;
- k += off, ucp += off) {
-
- if (h->group_id == (ucp[2] << 8) + ucp[3]) {
- h->state = ucp[0] & 0x0f;
- h->pref = ucp[0] >> 7;
- valid_states = ucp[1];
+ k += off, desc += off) {
+ u16 group_id = get_unaligned_be16(&desc[2]);
+
+ spin_lock_irqsave(&port_group_lock, flags);
+ tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
+ group_id);
+ spin_unlock_irqrestore(&port_group_lock, flags);
+ if (tmp_pg) {
+ if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
+ if ((tmp_pg == pg) ||
+ !(tmp_pg->flags & ALUA_PG_RUNNING)) {
+ struct alua_dh_data *h;
+
+ tmp_pg->state = desc[0] & 0x0f;
+ tmp_pg->pref = desc[0] >> 7;
+ rcu_read_lock();
+ list_for_each_entry_rcu(h,
+ &tmp_pg->dh_list, node) {
+ if (!h->sdev)
+ continue;
+ h->sdev->access_state = desc[0];
+ }
+ rcu_read_unlock();
+ }
+ if (tmp_pg == pg)
+ tmp_pg->valid_states = desc[1];
+ spin_unlock_irqrestore(&tmp_pg->lock, flags);
+ }
+ kref_put(&tmp_pg->kref, release_port_group);
}
- off = 8 + (ucp[7] * 4);
- }
-
- sdev_printk(KERN_INFO, sdev,
- "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
- ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
- h->pref ? "preferred" : "non-preferred",
- valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
- valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
- valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
- valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
- valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
- valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
- valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
-
- switch (h->state) {
- case TPGS_STATE_TRANSITIONING:
- if (time_before(jiffies, expiry)) {
+ off = 8 + (desc[7] * 4);
+ }
+
+ skip_rtpg:
+ spin_lock_irqsave(&pg->lock, flags);
+ if (transitioning_sense)
+ pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+
+ if (group_id_old != pg->group_id || state_old != pg->state ||
+ pref_old != pg->pref || valid_states_old != pg->valid_states)
+ sdev_printk(KERN_INFO, sdev,
+ "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
+ ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
+ pg->pref ? "preferred" : "non-preferred",
+ pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
+ pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
+ pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
+ pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
+ pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
+ pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
+ pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
+
+ switch (pg->state) {
+ case SCSI_ACCESS_STATE_TRANSITIONING:
+ if (time_before(jiffies, pg->expiry)) {
/* State transition, retry */
- interval += 2000;
- msleep(interval);
- goto retry;
+ pg->interval = ALUA_RTPG_RETRY_DELAY;
+ err = SCSI_DH_RETRY;
+ } else {
+ struct alua_dh_data *h;
+
+ /* Transitioning time exceeded, set port to standby */
+ err = SCSI_DH_IO;
+ pg->state = SCSI_ACCESS_STATE_STANDBY;
+ pg->expiry = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(h, &pg->dh_list, node) {
+ if (!h->sdev)
+ continue;
+ h->sdev->access_state =
+ (pg->state & SCSI_ACCESS_STATE_MASK);
+ if (pg->pref)
+ h->sdev->access_state |=
+ SCSI_ACCESS_STATE_PREFERRED;
+ }
+ rcu_read_unlock();
}
- /* Transitioning time exceeded, set port to standby */
- err = SCSI_DH_RETRY;
- h->state = TPGS_STATE_STANDBY;
break;
- case TPGS_STATE_OFFLINE:
+ case SCSI_ACCESS_STATE_OFFLINE:
/* Path unusable */
err = SCSI_DH_DEV_OFFLINED;
+ pg->expiry = 0;
break;
default:
/* Useable path if active */
err = SCSI_DH_OK;
+ pg->expiry = 0;
break;
}
+ spin_unlock_irqrestore(&pg->lock, flags);
+ kfree(buff);
return err;
}
/*
+ * alua_stpg - Issue a SET TARGET PORT GROUP command
+ *
+ * Issue a SET TARGET PORT GROUP command and evaluate the
+ * response. Returns SCSI_DH_RETRY per default to trigger
+ * a re-evaluation of the target group state or SCSI_DH_OK
+ * if no further action needs to be taken.
+ */
+static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
+{
+ int retval;
+ struct scsi_sense_hdr sense_hdr;
+
+ if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
+ /* Only implicit ALUA supported, retry */
+ return SCSI_DH_RETRY;
+ }
+ switch (pg->state) {
+ case SCSI_ACCESS_STATE_OPTIMAL:
+ return SCSI_DH_OK;
+ case SCSI_ACCESS_STATE_ACTIVE:
+ if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
+ !pg->pref &&
+ (pg->tpgs & TPGS_MODE_IMPLICIT))
+ return SCSI_DH_OK;
+ break;
+ case SCSI_ACCESS_STATE_STANDBY:
+ case SCSI_ACCESS_STATE_UNAVAILABLE:
+ break;
+ case SCSI_ACCESS_STATE_OFFLINE:
+ return SCSI_DH_IO;
+ case SCSI_ACCESS_STATE_TRANSITIONING:
+ break;
+ default:
+ sdev_printk(KERN_INFO, sdev,
+ "%s: stpg failed, unhandled TPGS state %d",
+ ALUA_DH_NAME, pg->state);
+ return SCSI_DH_NOSYS;
+ }
+ retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
+
+ if (retval) {
+ if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
+ sdev_printk(KERN_INFO, sdev,
+ "%s: stpg failed, result %d",
+ ALUA_DH_NAME, retval);
+ if (retval < 0)
+ return SCSI_DH_DEV_TEMP_BUSY;
+ } else {
+ sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
+ ALUA_DH_NAME);
+ scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
+ }
+ }
+ /* Retry RTPG */
+ return SCSI_DH_RETRY;
+}
+
+/*
+ * The caller must call scsi_device_put() on the returned pointer if it is not
+ * NULL.
+ */
+static struct scsi_device * __must_check
+alua_rtpg_select_sdev(struct alua_port_group *pg)
+{
+ struct alua_dh_data *h;
+ struct scsi_device *sdev = NULL, *prev_sdev;
+
+ lockdep_assert_held(&pg->lock);
+ if (WARN_ON(!pg->rtpg_sdev))
+ return NULL;
+
+ /*
+ * RCU protection isn't necessary for dh_list here
+ * as we hold pg->lock, but for access to h->pg.
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(h, &pg->dh_list, node) {
+ if (!h->sdev)
+ continue;
+ if (h->sdev == pg->rtpg_sdev) {
+ h->disabled = true;
+ continue;
+ }
+ if (rcu_dereference(h->pg) == pg &&
+ !h->disabled &&
+ !scsi_device_get(h->sdev)) {
+ sdev = h->sdev;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!sdev) {
+ pr_warn("%s: no device found for rtpg\n",
+ (pg->device_id_len ?
+ (char *)pg->device_id_str : "(nameless PG)"));
+ return NULL;
+ }
+
+ sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n");
+
+ prev_sdev = pg->rtpg_sdev;
+ pg->rtpg_sdev = sdev;
+
+ return prev_sdev;
+}
+
+static void alua_rtpg_work(struct work_struct *work)
+{
+ struct alua_port_group *pg =
+ container_of(work, struct alua_port_group, rtpg_work.work);
+ struct scsi_device *sdev, *prev_sdev = NULL;
+ LIST_HEAD(qdata_list);
+ int err = SCSI_DH_OK;
+ struct alua_queue_data *qdata, *tmp;
+ struct alua_dh_data *h;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pg->lock, flags);
+ sdev = pg->rtpg_sdev;
+ if (!sdev) {
+ WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
+ WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
+ spin_unlock_irqrestore(&pg->lock, flags);
+ kref_put(&pg->kref, release_port_group);
+ return;
+ }
+ pg->flags |= ALUA_PG_RUNNING;
+ if (pg->flags & ALUA_PG_RUN_RTPG) {
+ int state = pg->state;
+
+ pg->flags &= ~ALUA_PG_RUN_RTPG;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
+ if (alua_tur(sdev) == SCSI_DH_RETRY) {
+ spin_lock_irqsave(&pg->lock, flags);
+ pg->flags &= ~ALUA_PG_RUNNING;
+ pg->flags |= ALUA_PG_RUN_RTPG;
+ if (!pg->interval)
+ pg->interval = ALUA_RTPG_RETRY_DELAY;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ queue_delayed_work(kaluad_wq, &pg->rtpg_work,
+ pg->interval * HZ);
+ return;
+ }
+ /* Send RTPG on failure or if TUR indicates SUCCESS */
+ }
+ err = alua_rtpg(sdev, pg);
+ spin_lock_irqsave(&pg->lock, flags);
+
+ /* If RTPG failed on the current device, try using another */
+ if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
+ (prev_sdev = alua_rtpg_select_sdev(pg)))
+ err = SCSI_DH_IMM_RETRY;
+
+ if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY ||
+ pg->flags & ALUA_PG_RUN_RTPG) {
+ pg->flags &= ~ALUA_PG_RUNNING;
+ if (err == SCSI_DH_IMM_RETRY)
+ pg->interval = 0;
+ else if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
+ pg->interval = ALUA_RTPG_RETRY_DELAY;
+ pg->flags |= ALUA_PG_RUN_RTPG;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ goto queue_rtpg;
+ }
+ if (err != SCSI_DH_OK)
+ pg->flags &= ~ALUA_PG_RUN_STPG;
+ }
+ if (pg->flags & ALUA_PG_RUN_STPG) {
+ pg->flags &= ~ALUA_PG_RUN_STPG;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ err = alua_stpg(sdev, pg);
+ spin_lock_irqsave(&pg->lock, flags);
+ if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
+ pg->flags |= ALUA_PG_RUN_RTPG;
+ pg->interval = 0;
+ pg->flags &= ~ALUA_PG_RUNNING;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ goto queue_rtpg;
+ }
+ }
+
+ list_splice_init(&pg->rtpg_list, &qdata_list);
+ /*
+ * We went through an RTPG, for good or bad.
+ * Re-enable all devices for the next attempt.
+ */
+ list_for_each_entry(h, &pg->dh_list, node)
+ h->disabled = false;
+ pg->rtpg_sdev = NULL;
+ spin_unlock_irqrestore(&pg->lock, flags);
+
+ if (prev_sdev)
+ scsi_device_put(prev_sdev);
+
+ list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
+ list_del(&qdata->entry);
+ if (qdata->callback_fn)
+ qdata->callback_fn(qdata->callback_data, err);
+ kfree(qdata);
+ }
+ spin_lock_irqsave(&pg->lock, flags);
+ pg->flags &= ~ALUA_PG_RUNNING;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ scsi_device_put(sdev);
+ kref_put(&pg->kref, release_port_group);
+ return;
+
+queue_rtpg:
+ if (prev_sdev)
+ scsi_device_put(prev_sdev);
+ queue_delayed_work(kaluad_wq, &pg->rtpg_work, pg->interval * HZ);
+}
+
+/**
+ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ * @pg: ALUA port group associated with @sdev.
+ * @sdev: SCSI device for which to submit an RTPG.
+ * @qdata: Information about the callback to invoke after the RTPG.
+ * @force: Whether or not to submit an RTPG if a work item that will submit an
+ * RTPG already has been scheduled.
+ *
+ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
+ * That function is responsible for calling @qdata->fn().
+ *
+ * Context: may be called from atomic context (alua_check()) only if the caller
+ * holds an sdev reference.
+ */
+static bool alua_rtpg_queue(struct alua_port_group *pg,
+ struct scsi_device *sdev,
+ struct alua_queue_data *qdata, bool force)
+{
+ int start_queue = 0;
+ unsigned long flags;
+
+ if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
+ return false;
+
+ spin_lock_irqsave(&pg->lock, flags);
+ if (qdata) {
+ list_add_tail(&qdata->entry, &pg->rtpg_list);
+ pg->flags |= ALUA_PG_RUN_STPG;
+ force = true;
+ }
+ if (pg->rtpg_sdev == NULL) {
+ struct alua_dh_data *h = sdev->handler_data;
+
+ rcu_read_lock();
+ if (h && rcu_dereference(h->pg) == pg) {
+ pg->interval = 0;
+ pg->flags |= ALUA_PG_RUN_RTPG;
+ kref_get(&pg->kref);
+ pg->rtpg_sdev = sdev;
+ start_queue = 1;
+ }
+ rcu_read_unlock();
+ } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
+ pg->flags |= ALUA_PG_RUN_RTPG;
+ /* Do not queue if the worker is already running */
+ if (!(pg->flags & ALUA_PG_RUNNING)) {
+ kref_get(&pg->kref);
+ start_queue = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&pg->lock, flags);
+
+ if (start_queue) {
+ if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
+ msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
+ sdev = NULL;
+ else
+ kref_put(&pg->kref, release_port_group);
+ }
+ if (sdev)
+ scsi_device_put(sdev);
+
+ return true;
+}
+
+/*
* alua_initialize - Initialize ALUA state
* @sdev: the device to be initialized
*
@@ -663,21 +1065,15 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
*/
static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
{
- int err;
-
- err = alua_check_tpgs(sdev, h);
- if (err != SCSI_DH_OK)
- goto out;
-
- err = alua_vpd_inquiry(sdev, h);
- if (err != SCSI_DH_OK)
- goto out;
-
- err = alua_rtpg(sdev, h);
- if (err != SCSI_DH_OK)
- goto out;
-
-out:
+ int err = SCSI_DH_DEV_UNSUPP, tpgs;
+
+ mutex_lock(&h->init_mutex);
+ h->disabled = false;
+ tpgs = alua_check_tpgs(sdev);
+ if (tpgs != TPGS_MODE_NONE)
+ err = alua_check_vpd(sdev, h, tpgs);
+ h->init_error = err;
+ mutex_unlock(&h->init_mutex);
return err;
}
/*
@@ -691,10 +1087,12 @@ out:
*/
static int alua_set_params(struct scsi_device *sdev, const char *params)
{
- struct alua_dh_data *h = get_alua_data(sdev);
+ struct alua_dh_data *h = sdev->handler_data;
+ struct alua_port_group *pg = NULL;
unsigned int optimize = 0, argc;
const char *p = params;
int result = SCSI_DH_OK;
+ unsigned long flags;
if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
return -EINVAL;
@@ -704,18 +1102,23 @@ static int alua_set_params(struct scsi_device *sdev, const char *params)
if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
return -EINVAL;
+ rcu_read_lock();
+ pg = rcu_dereference(h->pg);
+ if (!pg) {
+ rcu_read_unlock();
+ return -ENXIO;
+ }
+ spin_lock_irqsave(&pg->lock, flags);
if (optimize)
- h->flags |= ALUA_OPTIMIZE_STPG;
+ pg->flags |= ALUA_OPTIMIZE_STPG;
else
- h->flags &= ~ALUA_OPTIMIZE_STPG;
+ pg->flags &= ~ALUA_OPTIMIZE_STPG;
+ spin_unlock_irqrestore(&pg->lock, flags);
+ rcu_read_unlock();
return result;
}
-static uint optimize_stpg;
-module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
-
/*
* alua_activate - activate a path
* @sdev: device on the path to be activated
@@ -729,50 +1132,39 @@ MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than
static int alua_activate(struct scsi_device *sdev,
activate_complete fn, void *data)
{
- struct alua_dh_data *h = get_alua_data(sdev);
+ struct alua_dh_data *h = sdev->handler_data;
int err = SCSI_DH_OK;
- int stpg = 0;
+ struct alua_queue_data *qdata;
+ struct alua_port_group *pg;
- err = alua_rtpg(sdev, h);
- if (err != SCSI_DH_OK)
+ qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
+ if (!qdata) {
+ err = SCSI_DH_RES_TEMP_UNAVAIL;
goto out;
-
- if (optimize_stpg)
- h->flags |= ALUA_OPTIMIZE_STPG;
-
- if (h->tpgs & TPGS_MODE_EXPLICIT) {
- switch (h->state) {
- case TPGS_STATE_NONOPTIMIZED:
- stpg = 1;
- if ((h->flags & ALUA_OPTIMIZE_STPG) &&
- (!h->pref) &&
- (h->tpgs & TPGS_MODE_IMPLICIT))
- stpg = 0;
- break;
- case TPGS_STATE_STANDBY:
- case TPGS_STATE_UNAVAILABLE:
- stpg = 1;
- break;
- case TPGS_STATE_OFFLINE:
- err = SCSI_DH_IO;
- break;
- case TPGS_STATE_TRANSITIONING:
- err = SCSI_DH_RETRY;
- break;
- default:
- break;
- }
}
-
- if (stpg) {
- h->callback_fn = fn;
- h->callback_data = data;
- err = submit_stpg(h);
- if (err == SCSI_DH_OK)
- return 0;
- h->callback_fn = h->callback_data = NULL;
+ qdata->callback_fn = fn;
+ qdata->callback_data = data;
+
+ mutex_lock(&h->init_mutex);
+ rcu_read_lock();
+ pg = rcu_dereference(h->pg);
+ if (!pg || !kref_get_unless_zero(&pg->kref)) {
+ rcu_read_unlock();
+ kfree(qdata);
+ err = h->init_error;
+ mutex_unlock(&h->init_mutex);
+ goto out;
}
+ rcu_read_unlock();
+ mutex_unlock(&h->init_mutex);
+ if (alua_rtpg_queue(pg, sdev, qdata, true)) {
+ fn = NULL;
+ } else {
+ kfree(qdata);
+ err = SCSI_DH_DEV_OFFLINED;
+ }
+ kref_put(&pg->kref, release_port_group);
out:
if (fn)
fn(data, err);
@@ -780,47 +1172,63 @@ out:
}
/*
+ * alua_check - check path status
+ * @sdev: device on the path to be checked
+ *
+ * Check the device status
+ */
+static void alua_check(struct scsi_device *sdev, bool force)
+{
+ struct alua_dh_data *h = sdev->handler_data;
+ struct alua_port_group *pg;
+
+ rcu_read_lock();
+ pg = rcu_dereference(h->pg);
+ if (!pg || !kref_get_unless_zero(&pg->kref)) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ alua_rtpg_queue(pg, sdev, NULL, force);
+ kref_put(&pg->kref, release_port_group);
+}
+
+/*
* alua_prep_fn - request callback
*
* Fail I/O to all paths not in state
* active/optimized or active/non-optimized.
*/
-static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
+static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
{
- struct alua_dh_data *h = get_alua_data(sdev);
- int ret = BLKPREP_OK;
+ struct alua_dh_data *h = sdev->handler_data;
+ struct alua_port_group *pg;
+ unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
- if (h->state == TPGS_STATE_TRANSITIONING)
- ret = BLKPREP_DEFER;
- else if (h->state != TPGS_STATE_OPTIMIZED &&
- h->state != TPGS_STATE_NONOPTIMIZED &&
- h->state != TPGS_STATE_LBA_DEPENDENT) {
- ret = BLKPREP_KILL;
- req->cmd_flags |= REQ_QUIET;
- }
- return ret;
+ rcu_read_lock();
+ pg = rcu_dereference(h->pg);
+ if (pg)
+ state = pg->state;
+ rcu_read_unlock();
+ switch (state) {
+ case SCSI_ACCESS_STATE_OPTIMAL:
+ case SCSI_ACCESS_STATE_ACTIVE:
+ case SCSI_ACCESS_STATE_LBA:
+ case SCSI_ACCESS_STATE_TRANSITIONING:
+ return BLK_STS_OK;
+ default:
+ req->rq_flags |= RQF_QUIET;
+ return BLK_STS_IOERR;
+ }
}
-static bool alua_match(struct scsi_device *sdev)
+static void alua_rescan(struct scsi_device *sdev)
{
- return (scsi_device_tpgs(sdev) != 0);
-}
-
-static int alua_bus_attach(struct scsi_device *sdev);
-static void alua_bus_detach(struct scsi_device *sdev);
+ struct alua_dh_data *h = sdev->handler_data;
-static struct scsi_device_handler alua_dh = {
- .name = ALUA_DH_NAME,
- .module = THIS_MODULE,
- .attach = alua_bus_attach,
- .detach = alua_bus_detach,
- .prep_fn = alua_prep_fn,
- .check_sense = alua_check_sense,
- .activate = alua_activate,
- .set_params = alua_set_params,
- .match = alua_match,
-};
+ alua_initialize(sdev, h);
+}
/*
* alua_bus_attach - Attach device handler
@@ -828,47 +1236,28 @@ static struct scsi_device_handler alua_dh = {
*/
static int alua_bus_attach(struct scsi_device *sdev)
{
- struct scsi_dh_data *scsi_dh_data;
struct alua_dh_data *h;
- unsigned long flags;
- int err = SCSI_DH_OK;
-
- scsi_dh_data = kzalloc(sizeof(*scsi_dh_data)
- + sizeof(*h) , GFP_KERNEL);
- if (!scsi_dh_data) {
- sdev_printk(KERN_ERR, sdev, "%s: Attach failed\n",
- ALUA_DH_NAME);
- return -ENOMEM;
- }
+ int err;
- scsi_dh_data->scsi_dh = &alua_dh;
- h = (struct alua_dh_data *) scsi_dh_data->buf;
- h->tpgs = TPGS_MODE_UNINITIALIZED;
- h->state = TPGS_STATE_OPTIMIZED;
- h->group_id = -1;
- h->rel_port = -1;
- h->buff = h->inq;
- h->bufflen = ALUA_INQUIRY_SIZE;
+ h = kzalloc(sizeof(*h) , GFP_KERNEL);
+ if (!h)
+ return SCSI_DH_NOMEM;
+ spin_lock_init(&h->pg_lock);
+ rcu_assign_pointer(h->pg, NULL);
+ h->init_error = SCSI_DH_OK;
h->sdev = sdev;
+ INIT_LIST_HEAD(&h->node);
+ mutex_init(&h->init_mutex);
err = alua_initialize(sdev, h);
- if ((err != SCSI_DH_OK) && (err != SCSI_DH_DEV_OFFLINED))
- goto failed;
-
- if (!try_module_get(THIS_MODULE))
+ if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
goto failed;
- spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
- sdev->scsi_dh_data = scsi_dh_data;
- spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
- sdev_printk(KERN_NOTICE, sdev, "%s: Attached\n", ALUA_DH_NAME);
-
- return 0;
-
+ sdev->handler_data = h;
+ return SCSI_DH_OK;
failed:
- kfree(scsi_dh_data);
- sdev_printk(KERN_ERR, sdev, "%s: not attached\n", ALUA_DH_NAME);
- return -EINVAL;
+ kfree(h);
+ return err;
}
/*
@@ -877,37 +1266,57 @@ failed:
*/
static void alua_bus_detach(struct scsi_device *sdev)
{
- struct scsi_dh_data *scsi_dh_data;
- struct alua_dh_data *h;
- unsigned long flags;
-
- spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
- scsi_dh_data = sdev->scsi_dh_data;
- sdev->scsi_dh_data = NULL;
- spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
-
- h = (struct alua_dh_data *) scsi_dh_data->buf;
- if (h->buff && h->inq != h->buff)
- kfree(h->buff);
- kfree(scsi_dh_data);
- module_put(THIS_MODULE);
- sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", ALUA_DH_NAME);
+ struct alua_dh_data *h = sdev->handler_data;
+ struct alua_port_group *pg;
+
+ spin_lock(&h->pg_lock);
+ pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
+ rcu_assign_pointer(h->pg, NULL);
+ spin_unlock(&h->pg_lock);
+ if (pg) {
+ spin_lock_irq(&pg->lock);
+ list_del_rcu(&h->node);
+ spin_unlock_irq(&pg->lock);
+ kref_put(&pg->kref, release_port_group);
+ }
+ sdev->handler_data = NULL;
+ synchronize_rcu();
+ kfree(h);
}
+static struct scsi_device_handler alua_dh = {
+ .name = ALUA_DH_NAME,
+ .module = THIS_MODULE,
+ .attach = alua_bus_attach,
+ .detach = alua_bus_detach,
+ .prep_fn = alua_prep_fn,
+ .check_sense = alua_check_sense,
+ .activate = alua_activate,
+ .rescan = alua_rescan,
+ .set_params = alua_set_params,
+};
+
static int __init alua_init(void)
{
int r;
+ kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
+ if (!kaluad_wq)
+ return -ENOMEM;
+
r = scsi_register_device_handler(&alua_dh);
- if (r != 0)
+ if (r != 0) {
printk(KERN_ERR "%s: Failed to register scsi device handler",
ALUA_DH_NAME);
+ destroy_workqueue(kaluad_wq);
+ }
return r;
}
static void __exit alua_exit(void)
{
scsi_unregister_device_handler(&alua_dh);
+ destroy_workqueue(kaluad_wq);
}
module_init(alua_init);