From be34c62ddf39d1931780b07a6f4241393e4ba2ee Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:22:19 +0200 Subject: scsi_transport_srp: Introduce srp_wait_for_queuecommand() Introduce the helper function srp_wait_for_queuecommand(). Move the definition of scsi_request_fn_active(). Add a comment above srp_wait_for_queuecommand() that support for scsi-mq needs to be added. This patch does not change any functionality. A second call to srp_wait_for_queuecommand() will be introduced in the next patch. Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Cc: #v3.13 Signed-off-by: Doug Ledford --- drivers/scsi/scsi_transport_srp.c | 54 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 23 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index ae45bd99baed..e05cd7e2d6d3 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -396,6 +396,36 @@ static void srp_reconnect_work(struct work_struct *work) } } +/** + * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. + * + * To do: add support for scsi-mq in this function. + */ +static int scsi_request_fn_active(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + struct request_queue *q; + int request_fn_active = 0; + + shost_for_each_device(sdev, shost) { + q = sdev->request_queue; + + spin_lock_irq(q->queue_lock); + request_fn_active += q->request_fn_active; + spin_unlock_irq(q->queue_lock); + } + + return request_fn_active; +} + +/* Wait until ongoing shost->hostt->queuecommand() calls have finished. */ +static void srp_wait_for_queuecommand(struct Scsi_Host *shost) +{ + while (scsi_request_fn_active(shost)) + msleep(20); +} + static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); @@ -503,27 +533,6 @@ void srp_start_tl_fail_timers(struct srp_rport *rport) } EXPORT_SYMBOL(srp_start_tl_fail_timers); -/** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @shost: SCSI host for which to count the number of scsi_request_fn() callers. - */ -static int scsi_request_fn_active(struct Scsi_Host *shost) -{ - struct scsi_device *sdev; - struct request_queue *q; - int request_fn_active = 0; - - shost_for_each_device(sdev, shost) { - q = sdev->request_queue; - - spin_lock_irq(q->queue_lock); - request_fn_active += q->request_fn_active; - spin_unlock_irq(q->queue_lock); - } - - return request_fn_active; -} - /** * srp_reconnect_rport() - reconnect to an SRP target port * @rport: SRP target port. @@ -559,8 +568,7 @@ int srp_reconnect_rport(struct srp_rport *rport) if (res) goto out; scsi_target_block(&shost->shost_gendev); - while (scsi_request_fn_active(shost)) - msleep(20); + srp_wait_for_queuecommand(shost); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); -- cgit From 535fb906225fb7436cb658144d0c0cea14a26f3e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:22:44 +0200 Subject: scsi_transport_srp: Fix a race condition Avoid that srp_terminate_io() can get invoked while srp_queuecommand() is in progress. This patch avoids that an I/O timeout can trigger the following kernel warning: WARNING: at drivers/infiniband/ulp/srp/ib_srp.c:1447 srp_terminate_io+0xef/0x100 [ib_srp]() Call Trace: [] dump_stack+0x4e/0x68 [] warn_slowpath_common+0x81/0xa0 [] warn_slowpath_null+0x1a/0x20 [] srp_terminate_io+0xef/0x100 [ib_srp] [] __rport_fail_io_fast+0xba/0xc0 [scsi_transport_srp] [] rport_fast_io_fail_timedout+0xe0/0xf0 [scsi_transport_srp] [] process_one_work+0x1db/0x780 [] worker_thread+0x11b/0x450 [] kthread+0xe4/0x100 [] ret_from_fork+0x7c/0xb0 See also patch "scsi_transport_srp: Add transport layer error handling" (commit ID 29c17324803c). Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Cc: #v3.13 Signed-off-by: Doug Ledford --- drivers/scsi/scsi_transport_srp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index e05cd7e2d6d3..f115f67a6ba5 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -439,8 +439,10 @@ static void __rport_fail_io_fast(struct srp_rport *rport) /* Involve the LLD if possible to terminate all I/O on the rport. */ i = to_srp_internal(shost->transportt); - if (i->f->terminate_rport_io) + if (i->f->terminate_rport_io) { + srp_wait_for_queuecommand(shost); i->f->terminate_rport_io(rport); + } } /** -- cgit From e68ca75200fed4700471fb84ea0d09e25f269654 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:24:38 +0200 Subject: scsi_transport_srp: Reduce failover time Unlike FC, there is no risk that SCSI devices will be offlined by the SCSI error handler if the SCSI error handler is started while a reconnect attempt is ongoing. Hence report timeout errors as soon as possible if a higher software layer (e.g. multipathd) needs to be informed about a timeout. It is assumed that if both fast_io_fail_tmo < 0 and rport->dev_loss_tmo < 0 that this means that there is no need to report timeouts quickly. Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford --- drivers/scsi/scsi_transport_srp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index f115f67a6ba5..a85292b1d09d 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -61,6 +61,11 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) return dev_to_shost(r->dev.parent); } +static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost) +{ + return transport_class_to_srp_rport(&shost->shost_gendev); +} + /** * srp_tmo_valid() - check timeout combination validity * @reconnect_delay: Reconnect delay in seconds. @@ -628,9 +633,11 @@ static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; struct srp_internal *i = to_srp_internal(shost->transportt); + struct srp_rport *rport = shost_to_rport(shost); pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev)); - return i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? + return rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 && + i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; } -- cgit From 985aa49556a576416d3f960f8fc1c6513863e33e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:27:14 +0200 Subject: IB/srp: Add 64-bit LUN support The SCSI standard defines 64-bit values for LUNs. Large arrays employing large or hierarchical LUN numbers become more and more common. So update the SRP initiator to use 64-bit LUN numbers. See also Hannes Reinecke, commit 9cb78c16f5da ("scsi: use 64-bit LUNs"), June 2014. The largest LUN number that has been tested is 0xd2003fff00000000. Checked the following structure sizes with gdb: * sizeof(struct srp_cmd) = 48 * sizeof(struct srp_tsk_mgmt) = 48 * sizeof(struct srp_aer_req) = 36 The ibmvscsi changes have been compile tested only (on a PPC system). Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Yann Droneaud Cc: Sebastian Parschauer Cc: Brian King Cc: Nathan Fontenot Cc: Tyrel Datwyler Signed-off-by: Doug Ledford --- drivers/scsi/ibmvscsi/ibmvscsi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index acea5d6eebd0..6a41c36b16b0 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1053,7 +1053,7 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd, memset(srp_cmd, 0x00, SRP_MAX_IU_LEN); srp_cmd->opcode = SRP_CMD; memcpy(srp_cmd->cdb, cmnd->cmnd, sizeof(srp_cmd->cdb)); - srp_cmd->lun = cpu_to_be64(((u64)lun) << 48); + int_to_scsilun(lun, &srp_cmd->lun); if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { if (!firmware_has_feature(FW_FEATURE_CMO)) @@ -1529,7 +1529,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) /* Set up an abort SRP command */ memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; tsk_mgmt->task_tag = (u64) found_evt; @@ -1652,7 +1652,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd) /* Set up a lun reset SRP command */ memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; evt->sync_srp = &srp_rsp; -- cgit