diff options
Diffstat (limited to 'drivers/scsi/scsi_error.c')
| -rw-r--r-- | drivers/scsi/scsi_error.c | 1584 |
1 files changed, 969 insertions, 615 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 21505962f539..f869108fd969 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * scsi_error.c Copyright (C) 1997 Eric Youngdale * @@ -33,9 +34,13 @@ #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> +#include <scsi/scsi_common.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> +#include <scsi/scsi_dh.h> +#include <scsi/scsi_devinfo.h> +#include <scsi/sg.h> #include "scsi_priv.h" #include "scsi_logging.h" @@ -43,7 +48,7 @@ #include <trace/events/scsi.h> -static void scsi_eh_done(struct scsi_cmnd *scmd); +#include <linux/unaligned.h> /* * These should *probably* be handled by the host itself. @@ -53,15 +58,18 @@ static void scsi_eh_done(struct scsi_cmnd *scmd); #define HOST_RESET_SETTLE_TIME (10) static int scsi_eh_try_stu(struct scsi_cmnd *scmd); +static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *, + struct scsi_cmnd *); -/* called with shost->host_lock held */ -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { - if (shost->host_busy == shost->host_failed) { + lockdep_assert_held(shost->host_lock); + + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); - SCSI_LOG_ERROR_RECOVERY(5, - printk("Waking error handler thread\n")); + SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, + "Waking error handler thread\n")); } } @@ -80,47 +88,242 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); } EXPORT_SYMBOL_GPL(scsi_schedule_eh); +static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) +{ + if (!shost->last_reset || shost->eh_deadline == -1) + return 0; + + /* + * 32bit accesses are guaranteed to be atomic + * (on all supported architectures), so instead + * of using a spinlock we can as well double check + * if eh_deadline has been set to 'off' during the + * time_before call. + */ + if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && + shost->eh_deadline > -1) + return 0; + + return 1; +} + +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd) +{ + if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT) + return true; + + return ++cmd->retries <= cmd->allowed; +} + +static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd) +{ + struct scsi_device *sdev = cmd->device; + struct Scsi_Host *host = sdev->host; + + if (host->hostt->eh_should_retry_cmd) + return host->hostt->eh_should_retry_cmd(cmd); + + return true; +} + /** - * scsi_eh_scmd_add - add scsi cmd to error handling. + * scmd_eh_abort_handler - Handle command aborts + * @work: command to be aborted. + * + * Note: this function must be called only for a command that has timed out. + * Because the block layer marks a request as complete before it calls + * scsi_timeout(), a .scsi_done() call from the LLD for a command that has + * timed out do not have any effect. Hence it is safe to call + * scsi_finish_command() from this function. + */ +void +scmd_eh_abort_handler(struct work_struct *work) +{ + struct scsi_cmnd *scmd = + container_of(work, struct scsi_cmnd, abort_work.work); + struct scsi_device *sdev = scmd->device; + struct Scsi_Host *shost = sdev->host; + enum scsi_disposition rtn; + unsigned long flags; + + if (scsi_host_eh_past_deadline(shost)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "eh timeout, not aborting\n")); + goto out; + } + + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "aborting command\n")); + rtn = scsi_try_to_abort_cmd(shost->hostt, scmd); + if (rtn != SUCCESS) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "cmd abort %s\n", + (rtn == FAST_IO_FAIL) ? + "not send" : "failed")); + goto out; + } + set_host_byte(scmd, DID_TIME_OUT); + if (scsi_host_eh_past_deadline(shost)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "eh timeout, not retrying " + "aborted command\n")); + goto out; + } + + spin_lock_irqsave(shost->host_lock, flags); + list_del_init(&scmd->eh_entry); + + /* + * If the abort succeeds, and there is no further + * EH action, clear the ->last_reset time. + */ + if (list_empty(&shost->eh_abort_list) && + list_empty(&shost->eh_cmd_q)) + if (shost->eh_deadline != -1) + shost->last_reset = 0; + + spin_unlock_irqrestore(shost->host_lock, flags); + + if (!scsi_noretry_cmd(scmd) && + scsi_cmd_retry_allowed(scmd) && + scsi_eh_should_retry_cmd(scmd)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_WARNING, scmd, + "retry aborted command\n")); + scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + } else { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_WARNING, scmd, + "finish aborted command\n")); + scsi_finish_command(scmd); + } + return; + +out: + spin_lock_irqsave(shost->host_lock, flags); + list_del_init(&scmd->eh_entry); + spin_unlock_irqrestore(shost->host_lock, flags); + + scsi_eh_scmd_add(scmd); +} + +/** + * scsi_abort_command - schedule a command abort + * @scmd: scmd to abort. + * + * We only need to abort commands after a command timeout + */ +static int +scsi_abort_command(struct scsi_cmnd *scmd) +{ + struct scsi_device *sdev = scmd->device; + struct Scsi_Host *shost = sdev->host; + unsigned long flags; + + if (!shost->hostt->eh_abort_handler) { + /* No abort handler, fail command directly */ + return FAILED; + } + + if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { + /* + * Retry after abort failed, escalate to next level. + */ + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "previous abort failed\n")); + BUG_ON(delayed_work_pending(&scmd->abort_work)); + return FAILED; + } + + spin_lock_irqsave(shost->host_lock, flags); + if (shost->eh_deadline != -1 && !shost->last_reset) + shost->last_reset = jiffies; + BUG_ON(!list_empty(&scmd->eh_entry)); + list_add_tail(&scmd->eh_entry, &shost->eh_abort_list); + spin_unlock_irqrestore(shost->host_lock, flags); + + scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, "abort scheduled\n")); + queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100); + return SUCCESS; +} + +/** + * scsi_eh_reset - call into ->eh_action to reset internal counters * @scmd: scmd to run eh on. - * @eh_flag: optional SCSI_EH flag. * - * Return value: - * 0 on failure. + * The scsi driver might be carrying internal state about the + * devices, so we need to call into the driver to reset the + * internal state once the error handler is started. + */ +static void scsi_eh_reset(struct scsi_cmnd *scmd) +{ + if (!blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) { + struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); + if (sdrv->eh_reset) + sdrv->eh_reset(scmd); + } +} + +static void scsi_eh_inc_host_failed(struct rcu_head *head) +{ + struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); + struct Scsi_Host *shost = scmd->device->host; + unsigned int busy = scsi_host_busy(shost); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + shost->host_failed++; + scsi_eh_wakeup(shost, busy); + spin_unlock_irqrestore(shost->host_lock, flags); +} + +/** + * scsi_eh_scmd_add - add scsi cmd to error handling. + * @scmd: scmd to run eh on. */ -int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) +void scsi_eh_scmd_add(struct scsi_cmnd *scmd) { struct Scsi_Host *shost = scmd->device->host; unsigned long flags; - int ret = 0; + int ret; - if (!shost->ehandler) - return 0; + WARN_ON_ONCE(!shost->ehandler); + WARN_ON_ONCE(!test_bit(SCMD_STATE_INFLIGHT, &scmd->state)); spin_lock_irqsave(shost->host_lock, flags); - if (scsi_host_set_state(shost, SHOST_RECOVERY)) - if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) - goto out_unlock; + if (scsi_host_set_state(shost, SHOST_RECOVERY)) { + ret = scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY); + WARN_ON_ONCE(ret); + } + if (shost->eh_deadline != -1 && !shost->last_reset) + shost->last_reset = jiffies; - ret = 1; - scmd->eh_eflags |= eh_flag; + scsi_eh_reset(scmd); list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); - shost->host_failed++; - scsi_eh_wakeup(shost); - out_unlock: spin_unlock_irqrestore(shost->host_lock, flags); - return ret; + /* + * Ensure that all tasks observe the host state change before the + * host_failed change. + */ + call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed); } /** - * scsi_times_out - Timeout function for normal scsi commands. + * scsi_timeout - Timeout function for normal scsi commands. * @req: request that is timing out. * * Notes: @@ -129,27 +332,42 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ -enum blk_eh_timer_return scsi_times_out(struct request *req) +enum blk_eh_timer_return scsi_timeout(struct request *req) { - struct scsi_cmnd *scmd = req->special; - enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; + struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); struct Scsi_Host *host = scmd->device->host; trace_scsi_dispatch_cmd_timeout(scmd); scsi_log_completion(scmd, TIMEOUT_ERROR); - if (host->transportt->eh_timed_out) - rtn = host->transportt->eh_timed_out(scmd); - else if (host->hostt->eh_timed_out) - rtn = host->hostt->eh_timed_out(scmd); - - scmd->result |= DID_TIME_OUT << 16; + atomic_inc(&scmd->device->iotmo_cnt); + if (host->eh_deadline != -1 && !host->last_reset) + host->last_reset = jiffies; + + if (host->hostt->eh_timed_out) { + switch (host->hostt->eh_timed_out(scmd)) { + case SCSI_EH_DONE: + return BLK_EH_DONE; + case SCSI_EH_RESET_TIMER: + return BLK_EH_RESET_TIMER; + case SCSI_EH_NOT_HANDLED: + break; + } + } - if (unlikely(rtn == BLK_EH_NOT_HANDLED && - !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) - rtn = BLK_EH_HANDLED; + /* + * If scsi_done() has already set SCMD_STATE_COMPLETE, do not modify + * *scmd. + */ + if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) + return BLK_EH_DONE; + atomic_inc(&scmd->device->iodone_cnt); + if (scsi_abort_command(scmd) != SUCCESS) { + set_host_byte(scmd, DID_TIME_OUT); + scsi_eh_scmd_add(scmd); + } - return rtn; + return BLK_EH_DONE; } /** @@ -171,9 +389,6 @@ int scsi_block_when_processing_errors(struct scsi_device *sdev) online = scsi_device_online(sdev); - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __func__, - online)); - return online; } EXPORT_SYMBOL(scsi_block_when_processing_errors); @@ -198,7 +413,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, list_for_each_entry(scmd, work_q, eh_entry) { if (scmd->device == sdev) { ++total_failures; - if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) + if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) ++cmd_cancel; else ++cmd_failed; @@ -207,7 +422,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, if (cmd_cancel || cmd_failed) { SCSI_LOG_ERROR_RECOVERY(3, - sdev_printk(KERN_INFO, sdev, + shost_printk(KERN_INFO, shost, "%s: cmds failed: %d, cancel: %d\n", __func__, cmd_failed, cmd_cancel)); @@ -217,52 +432,154 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, } } - SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d" - " devices require eh work\n", + SCSI_LOG_ERROR_RECOVERY(2, shost_printk(KERN_INFO, shost, + "Total of %d commands on %d" + " devices require eh work\n", total_failures, devices_failed)); } #endif + /** + * scsi_report_lun_change - Set flag on all *other* devices on the same target + * to indicate that a UNIT ATTENTION is expected. + * @sdev: Device reporting the UNIT ATTENTION + */ +static void scsi_report_lun_change(struct scsi_device *sdev) +{ + sdev->sdev_target->expecting_lun_change = 1; +} + +/** + * scsi_report_sense - Examine scsi sense information and log messages for + * certain conditions, also issue uevents for some of them. + * @sdev: Device reporting the sense code + * @sshdr: sshdr to be examined + */ +static void scsi_report_sense(struct scsi_device *sdev, + struct scsi_sense_hdr *sshdr) +{ + enum scsi_device_event evt_type = SDEV_EVT_MAXBITS; /* i.e. none */ + + if (sshdr->sense_key == UNIT_ATTENTION) { + if (sshdr->asc == 0x3f && sshdr->ascq == 0x03) { + evt_type = SDEV_EVT_INQUIRY_CHANGE_REPORTED; + sdev_printk(KERN_WARNING, sdev, + "Inquiry data has changed"); + } else if (sshdr->asc == 0x3f && sshdr->ascq == 0x0e) { + evt_type = SDEV_EVT_LUN_CHANGE_REPORTED; + scsi_report_lun_change(sdev); + sdev_printk(KERN_WARNING, sdev, + "LUN assignments on this target have " + "changed. The Linux SCSI layer does not " + "automatically remap LUN assignments.\n"); + } else if (sshdr->asc == 0x3f) + sdev_printk(KERN_WARNING, sdev, + "Operating parameters on this target have " + "changed. The Linux SCSI layer does not " + "automatically adjust these parameters.\n"); + + if (sshdr->asc == 0x38 && sshdr->ascq == 0x07) { + evt_type = SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED; + sdev_printk(KERN_WARNING, sdev, + "Warning! Received an indication that the " + "LUN reached a thin provisioning soft " + "threshold.\n"); + } + + if (sshdr->asc == 0x29) { + evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; + /* + * Do not print message if it is an expected side-effect + * of runtime PM. + */ + if (!sdev->silence_suspend) + sdev_printk(KERN_WARNING, sdev, + "Power-on or device reset occurred\n"); + } + + if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { + evt_type = SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED; + sdev_printk(KERN_WARNING, sdev, + "Mode parameters changed"); + } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x06) { + evt_type = SDEV_EVT_ALUA_STATE_CHANGE_REPORTED; + sdev_printk(KERN_WARNING, sdev, + "Asymmetric access state changed"); + } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x09) { + evt_type = SDEV_EVT_CAPACITY_CHANGE_REPORTED; + sdev_printk(KERN_WARNING, sdev, + "Capacity data has changed"); + } else if (sshdr->asc == 0x2a) + sdev_printk(KERN_WARNING, sdev, + "Parameters changed"); + } + + if (evt_type != SDEV_EVT_MAXBITS) { + set_bit(evt_type, sdev->pending_events); + schedule_work(&sdev->event_work); + } +} + +static inline void set_scsi_ml_byte(struct scsi_cmnd *cmd, u8 status) +{ + cmd->result = (cmd->result & 0xffff00ff) | (status << 8); +} + /** * scsi_check_sense - Examine scsi cmd sense * @scmd: Cmd to have sense checked. * * Return value: - * SUCCESS or FAILED or NEEDS_RETRY or TARGET_ERROR + * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE * * Notes: * When a deferred error is detected the current command has * not been executed and needs retrying. */ -static int scsi_check_sense(struct scsi_cmnd *scmd) +enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) { + struct request *req = scsi_cmd_to_rq(scmd); struct scsi_device *sdev = scmd->device; struct scsi_sense_hdr sshdr; if (! scsi_command_normalize_sense(scmd, &sshdr)) return FAILED; /* no valid sense data */ - if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->scsi_done != scsi_eh_done) + scsi_report_sense(sdev, &sshdr); + + if (sshdr.sense_key == UNIT_ATTENTION) { /* - * nasty: for mid-layer issued TURs, we need to return the - * actual sense data without any recovery attempt. For eh - * issued ones, we need to try to recover and interpret + * Increment the counters for Power on/Reset or New Media so + * that all ULDs interested in these can see that those have + * happened, even if someone else gets the sense data. */ - return SUCCESS; + if (sshdr.asc == 0x28) + atomic_inc(&sdev->ua_new_media_ctr); + else if (sshdr.asc == 0x29) + atomic_inc(&sdev->ua_por_ctr); + } if (scsi_sense_is_deferred(&sshdr)) return NEEDS_RETRY; - if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh && - sdev->scsi_dh_data->scsi_dh->check_sense) { - int rc; + if (sdev->handler && sdev->handler->check_sense) { + enum scsi_disposition rc; - rc = sdev->scsi_dh_data->scsi_dh->check_sense(sdev, &sshdr); + rc = sdev->handler->check_sense(sdev, &sshdr); if (rc != SCSI_RETURN_NOT_HANDLED) return rc; /* handler does not care. Drop down to default handling */ } + if (scmd->cmnd[0] == TEST_UNIT_READY && + scmd->submitter != SUBMITTED_BY_SCSI_ERROR_HANDLER) + /* + * nasty: for mid-layer issued TURs, we need to return the + * actual sense data without any recovery attempt. For eh + * issued ones, we need to try to recover and interpret + */ + return SUCCESS; + /* * Previous logic looked for FILEMARK, EOM or ILI which are * mainly associated with tapes and returned SUCCESS. @@ -293,6 +610,28 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) if (sshdr.asc == 0x10) /* DIF */ return SUCCESS; + /* + * Check aborts due to command duration limit policy: + * ABORTED COMMAND additional sense code with the + * COMMAND TIMEOUT BEFORE PROCESSING or + * COMMAND TIMEOUT DURING PROCESSING or + * COMMAND TIMEOUT DURING PROCESSING DUE TO ERROR RECOVERY + * additional sense code qualifiers. + */ + if (sshdr.asc == 0x2e && + sshdr.ascq >= 0x01 && sshdr.ascq <= 0x03) { + set_scsi_ml_byte(scmd, SCSIML_STAT_DL_TIMEOUT); + req->cmd_flags |= REQ_FAILFAST_DEV; + req->rq_flags |= RQF_QUIET; + return SUCCESS; + } + + if (sshdr.asc == 0x44 && sdev->sdev_bflags & BLIST_RETRY_ITF) + return ADD_TO_MLQUEUE; + if (sshdr.asc == 0xc1 && sshdr.ascq == 0x01 && + sdev->sdev_bflags & BLIST_RETRY_ASC_C1) + return ADD_TO_MLQUEUE; + return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: @@ -315,10 +654,19 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) } } /* + * we might also expect a cc/ua if another LUN on the target + * reported a UA with an ASC/ASCQ of 3F 0E - + * REPORTED LUNS DATA HAS CHANGED. + */ + if (scmd->device->sdev_target->expecting_lun_change && + sshdr.asc == 0x3f && sshdr.ascq == 0x0e) + return NEEDS_RETRY; + /* * if the device is in the process of becoming ready, we * should retry. */ - if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) + if ((sshdr.asc == 0x04) && + (sshdr.ascq == 0x01 || sshdr.ascq == 0x0a)) return NEEDS_RETRY; /* * if the device is not started, we need to wake @@ -327,26 +675,6 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) if (scmd->device->allow_restart && (sshdr.asc == 0x04) && (sshdr.ascq == 0x02)) return FAILED; - - if (sshdr.asc == 0x3f && sshdr.ascq == 0x0e) - scmd_printk(KERN_WARNING, scmd, - "Warning! Received an indication that the " - "LUN assignments on this target have " - "changed. The Linux SCSI layer does not " - "automatically remap LUN assignments.\n"); - else if (sshdr.asc == 0x3f) - scmd_printk(KERN_WARNING, scmd, - "Warning! Received an indication that the " - "operating parameters on this target have " - "changed. The Linux SCSI layer does not " - "automatically adjust these parameters.\n"); - - if (sshdr.asc == 0x38 && sshdr.ascq == 0x07) - scmd_printk(KERN_WARNING, scmd, - "Warning! Received an indication that the " - "LUN reached a thin provisioning soft " - "threshold.\n"); - /* * Pass the UA upwards for a determination in the completion * functions. @@ -354,18 +682,26 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) return SUCCESS; /* these are not supported */ + case DATA_PROTECT: + if (sshdr.asc == 0x27 && sshdr.ascq == 0x07) { + /* Thin provisioning hard threshold reached */ + set_scsi_ml_byte(scmd, SCSIML_STAT_NOSPC); + return SUCCESS; + } + fallthrough; case COPY_ABORTED: case VOLUME_OVERFLOW: case MISCOMPARE: case BLANK_CHECK: - case DATA_PROTECT: - return TARGET_ERROR; + set_scsi_ml_byte(scmd, SCSIML_STAT_TGT_FAILURE); + return SUCCESS; case MEDIUM_ERROR: if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */ sshdr.asc == 0x13 || /* AMNF DATA FIELD */ sshdr.asc == 0x14) { /* RECORD NOT FOUND */ - return TARGET_ERROR; + set_scsi_ml_byte(scmd, SCSIML_STAT_MED_ERROR); + return SUCCESS; } return NEEDS_RETRY; @@ -373,14 +709,32 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) if (scmd->device->retry_hwerror) return ADD_TO_MLQUEUE; else - return TARGET_ERROR; + set_scsi_ml_byte(scmd, SCSIML_STAT_TGT_FAILURE); + fallthrough; case ILLEGAL_REQUEST: if (sshdr.asc == 0x20 || /* Invalid command operation code */ sshdr.asc == 0x21 || /* Logical block address out of range */ + sshdr.asc == 0x22 || /* Invalid function */ sshdr.asc == 0x24 || /* Invalid field in cdb */ - sshdr.asc == 0x26) { /* Parameter value invalid */ - return TARGET_ERROR; + sshdr.asc == 0x26 || /* Parameter value invalid */ + sshdr.asc == 0x27) { /* Write protected */ + set_scsi_ml_byte(scmd, SCSIML_STAT_TGT_FAILURE); + } + return SUCCESS; + + case COMPLETED: + /* + * A command using command duration limits (CDL) with a + * descriptor set with policy 0xD may be completed with success + * and the sense data DATA CURRENTLY UNAVAILABLE, indicating + * that the command was in fact aborted because it exceeded its + * duration limit. Never retry these commands. + */ + if (sshdr.asc == 0x55 && sshdr.ascq == 0x0a) { + set_scsi_ml_byte(scmd, SCSIML_STAT_DL_TIMEOUT); + req->cmd_flags |= REQ_FAILFAST_DEV; + req->rq_flags |= RQF_QUIET; } return SUCCESS; @@ -388,13 +742,17 @@ static int scsi_check_sense(struct scsi_cmnd *scmd) return SUCCESS; } } +EXPORT_SYMBOL_GPL(scsi_check_sense); static void scsi_handle_queue_ramp_up(struct scsi_device *sdev) { - struct scsi_host_template *sht = sdev->host->hostt; + const struct scsi_host_template *sht = sdev->host->hostt; struct scsi_device *tmp_sdev; - if (!sht->change_queue_depth || + if (!sdev->budget_map.map) + return; + + if (!sht->track_queue_depth || sdev->queue_depth >= sdev->max_queue_depth) return; @@ -415,22 +773,18 @@ static void scsi_handle_queue_ramp_up(struct scsi_device *sdev) tmp_sdev->id != sdev->id || tmp_sdev->queue_depth == sdev->max_queue_depth) continue; - /* - * call back into LLD to increase queue_depth by one - * with ramp up reason code. - */ - sht->change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1, - SCSI_QDEPTH_RAMP_UP); + + scsi_change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1); sdev->last_queue_ramp_up = jiffies; } } static void scsi_handle_queue_full(struct scsi_device *sdev) { - struct scsi_host_template *sht = sdev->host->hostt; + const struct scsi_host_template *sht = sdev->host->hostt; struct scsi_device *tmp_sdev; - if (!sht->change_queue_depth) + if (!sht->track_queue_depth) return; shost_for_each_device(tmp_sdev, sdev->host) { @@ -442,8 +796,7 @@ static void scsi_handle_queue_full(struct scsi_device *sdev) * the device when we got the queue full so we start * from the highest possible value and work our way down. */ - sht->change_queue_depth(tmp_sdev, tmp_sdev->queue_depth - 1, - SCSI_QDEPTH_QFULL); + scsi_track_queue_full(tmp_sdev, tmp_sdev->queue_depth - 1); } } @@ -457,7 +810,7 @@ static void scsi_handle_queue_full(struct scsi_device *sdev) * don't allow for the possibility of retries here, and we are a lot * more restrictive about what we consider acceptable. */ -static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) +static enum scsi_disposition scsi_eh_completed_normally(struct scsi_cmnd *scmd) { /* * first check the host byte, to see if there is anything in there @@ -476,40 +829,43 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) return FAILED; /* - * next, check the message byte. - */ - if (msg_byte(scmd->result) != COMMAND_COMPLETE) - return FAILED; - - /* * now, check the status byte to see if this indicates * anything special. */ - switch (status_byte(scmd->result)) { - case GOOD: + switch (get_status_byte(scmd)) { + case SAM_STAT_GOOD: scsi_handle_queue_ramp_up(scmd->device); - case COMMAND_TERMINATED: + if (scmd->sense_buffer && SCSI_SENSE_VALID(scmd)) + /* + * If we have sense data, call scsi_check_sense() in + * order to set the correct SCSI ML byte (if any). + * No point in checking the return value, since the + * command has already completed successfully. + */ + scsi_check_sense(scmd); + fallthrough; + case SAM_STAT_COMMAND_TERMINATED: return SUCCESS; - case CHECK_CONDITION: + case SAM_STAT_CHECK_CONDITION: return scsi_check_sense(scmd); - case CONDITION_GOOD: - case INTERMEDIATE_GOOD: - case INTERMEDIATE_C_GOOD: + case SAM_STAT_CONDITION_MET: + case SAM_STAT_INTERMEDIATE: + case SAM_STAT_INTERMEDIATE_CONDITION_MET: /* * who knows? FIXME(eric) */ return SUCCESS; - case RESERVATION_CONFLICT: + case SAM_STAT_RESERVATION_CONFLICT: if (scmd->cmnd[0] == TEST_UNIT_READY) /* it is a success, we probed the device and * found it */ return SUCCESS; /* otherwise, we failed to send the command */ return FAILED; - case QUEUE_FULL: + case SAM_STAT_TASK_SET_FULL: scsi_handle_queue_full(scmd->device); - /* fall through */ - case BUSY: + fallthrough; + case SAM_STAT_BUSY: return NEEDS_RETRY; default: return FAILED; @@ -521,13 +877,12 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) * scsi_eh_done - Completion function for error handling. * @scmd: Cmd that is done. */ -static void scsi_eh_done(struct scsi_cmnd *scmd) +void scsi_eh_done(struct scsi_cmnd *scmd) { struct completion *eh_action; - SCSI_LOG_ERROR_RECOVERY(3, - printk("%s scmd: %p result: %x\n", - __func__, scmd, scmd->result)); + SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, + "%s result: %x\n", __func__, scmd->result)); eh_action = scmd->device->host->eh_action; if (eh_action) @@ -538,15 +893,15 @@ static void scsi_eh_done(struct scsi_cmnd *scmd) * scsi_try_host_reset - ask host adapter to reset itself * @scmd: SCSI cmd to send host reset. */ -static int scsi_try_host_reset(struct scsi_cmnd *scmd) +static enum scsi_disposition scsi_try_host_reset(struct scsi_cmnd *scmd) { unsigned long flags; - int rtn; + enum scsi_disposition rtn; struct Scsi_Host *host = scmd->device->host; - struct scsi_host_template *hostt = host->hostt; + const struct scsi_host_template *hostt = host->hostt; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", - __func__)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, host, "Snd Host RST\n")); if (!hostt->eh_host_reset_handler) return FAILED; @@ -568,15 +923,15 @@ static int scsi_try_host_reset(struct scsi_cmnd *scmd) * scsi_try_bus_reset - ask host to perform a bus reset * @scmd: SCSI cmd to send bus reset. */ -static int scsi_try_bus_reset(struct scsi_cmnd *scmd) +static enum scsi_disposition scsi_try_bus_reset(struct scsi_cmnd *scmd) { unsigned long flags; - int rtn; + enum scsi_disposition rtn; struct Scsi_Host *host = scmd->device->host; - struct scsi_host_template *hostt = host->hostt; + const struct scsi_host_template *hostt = host->hostt; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n", - __func__)); + SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, + "%s: Snd Bus RST\n", __func__)); if (!hostt->eh_bus_reset_handler) return FAILED; @@ -610,12 +965,12 @@ static void __scsi_report_device_reset(struct scsi_device *sdev, void *data) * timer on it, and set the host back to a consistent state prior to * returning. */ -static int scsi_try_target_reset(struct scsi_cmnd *scmd) +static enum scsi_disposition scsi_try_target_reset(struct scsi_cmnd *scmd) { unsigned long flags; - int rtn; + enum scsi_disposition rtn; struct Scsi_Host *host = scmd->device->host; - struct scsi_host_template *hostt = host->hostt; + const struct scsi_host_template *hostt = host->hostt; if (!hostt->eh_target_reset_handler) return FAILED; @@ -641,10 +996,10 @@ static int scsi_try_target_reset(struct scsi_cmnd *scmd) * timer on it, and set the host back to a consistent state prior to * returning. */ -static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) +static enum scsi_disposition scsi_try_bus_device_reset(struct scsi_cmnd *scmd) { - int rtn; - struct scsi_host_template *hostt = scmd->device->host->hostt; + enum scsi_disposition rtn; + const struct scsi_host_template *hostt = scmd->device->host->hostt; if (!hostt->eh_device_reset_handler) return FAILED; @@ -655,7 +1010,25 @@ static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) return rtn; } -static int scsi_try_to_abort_cmd(struct scsi_host_template *hostt, struct scsi_cmnd *scmd) +/** + * scsi_try_to_abort_cmd - Ask host to abort a SCSI command + * @hostt: SCSI driver host template + * @scmd: SCSI cmd used to send a target reset + * + * Return value: + * SUCCESS, FAILED, or FAST_IO_FAIL + * + * Notes: + * SUCCESS does not necessarily indicate that the command + * has been aborted; it only indicates that the LLDDs + * has cleared all references to that command. + * LLDDs should return FAILED only if an abort was required + * but could not be executed. LLDDs should return FAST_IO_FAIL + * if the device is temporarily unavailable (eg due to a + * link down on FibreChannel) + */ +static enum scsi_disposition +scsi_try_to_abort_cmd(const struct scsi_host_template *hostt, struct scsi_cmnd *scmd) { if (!hostt->eh_abort_handler) return FAILED; @@ -677,7 +1050,7 @@ static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd) * @scmd: SCSI command structure to hijack * @ses: structure to save restore information * @cmnd: CDB to send. Can be NULL if no new cmnd is needed - * @cmnd_size: size in bytes of @cmnd (must be <= BLK_MAX_CDB) + * @cmnd_size: size in bytes of @cmnd (must be <= MAX_COMMAND_SIZE) * @sense_bytes: size of sense data to copy. or 0 (if != 0 @cmnd is ignored) * * This function is used to save a scsi command information before re-execution @@ -699,19 +1072,21 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, * command. */ ses->cmd_len = scmd->cmd_len; - ses->cmnd = scmd->cmnd; ses->data_direction = scmd->sc_data_direction; ses->sdb = scmd->sdb; - ses->next_rq = scmd->request->next_rq; ses->result = scmd->result; + ses->resid_len = scmd->resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; + ses->eh_eflags = scmd->eh_eflags; scmd->prot_op = SCSI_PROT_NORMAL; - scmd->cmnd = ses->eh_cmnd; - memset(scmd->cmnd, 0, BLK_MAX_CDB); + scmd->eh_eflags = 0; + memcpy(ses->cmnd, scmd->cmnd, sizeof(ses->cmnd)); + memset(scmd->cmnd, 0, sizeof(scmd->cmnd)); memset(&scmd->sdb, 0, sizeof(scmd->sdb)); - scmd->request->next_rq = NULL; + scmd->result = 0; + scmd->resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -720,14 +1095,14 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, scmd->sdb.length); scmd->sdb.table.sgl = &ses->sense_sgl; scmd->sc_data_direction = DMA_FROM_DEVICE; - scmd->sdb.table.nents = 1; + scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1; scmd->cmnd[0] = REQUEST_SENSE; scmd->cmnd[4] = scmd->sdb.length; scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); } else { scmd->sc_data_direction = DMA_NONE; if (cmnd) { - BUG_ON(cmnd_size > BLK_MAX_CDB); + BUG_ON(cmnd_size > sizeof(scmd->cmnd)); memcpy(scmd->cmnd, cmnd, cmnd_size); scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); } @@ -760,13 +1135,14 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) * Restore original data */ scmd->cmd_len = ses->cmd_len; - scmd->cmnd = ses->cmnd; + memcpy(scmd->cmnd, ses->cmnd, sizeof(ses->cmnd)); scmd->sc_data_direction = ses->data_direction; scmd->sdb = ses->sdb; - scmd->request->next_rq = ses->next_rq; scmd->result = ses->result; + scmd->resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; + scmd->eh_eflags = ses->eh_eflags; } EXPORT_SYMBOL(scsi_eh_restore_cmnd); @@ -784,13 +1160,13 @@ EXPORT_SYMBOL(scsi_eh_restore_cmnd); * Return value: * SUCCESS or FAILED or NEEDS_RETRY */ -static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd, - int cmnd_size, int timeout, unsigned sense_bytes) +static enum scsi_disposition scsi_send_eh_cmnd(struct scsi_cmnd *scmd, + unsigned char *cmnd, int cmnd_size, int timeout, unsigned sense_bytes) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; DECLARE_COMPLETION_ONSTACK(done); - unsigned long timeleft = timeout; + unsigned long timeleft = timeout, delay; struct scsi_eh_save ses; const unsigned long stall_for = msecs_to_jiffies(100); int rtn; @@ -800,29 +1176,54 @@ retry: shost->eh_action = &done; scsi_log_send(scmd); - scmd->scsi_done = scsi_eh_done; - rtn = shost->hostt->queuecommand(shost, scmd); + scmd->submitter = SUBMITTED_BY_SCSI_ERROR_HANDLER; + scmd->flags |= SCMD_LAST; + + /* + * Lock sdev->state_mutex to avoid that scsi_device_quiesce() can + * change the SCSI device state after we have examined it and before + * .queuecommand() is called. + */ + mutex_lock(&sdev->state_mutex); + while (sdev->sdev_state == SDEV_BLOCK && timeleft > 0) { + mutex_unlock(&sdev->state_mutex); + SCSI_LOG_ERROR_RECOVERY(5, sdev_printk(KERN_DEBUG, sdev, + "%s: state %d <> %d\n", __func__, sdev->sdev_state, + SDEV_BLOCK)); + delay = min(timeleft, stall_for); + timeleft -= delay; + msleep(jiffies_to_msecs(delay)); + mutex_lock(&sdev->state_mutex); + } + if (sdev->sdev_state != SDEV_BLOCK) + rtn = shost->hostt->queuecommand(shost, scmd); + else + rtn = FAILED; + mutex_unlock(&sdev->state_mutex); + if (rtn) { if (timeleft > stall_for) { scsi_eh_restore_cmnd(scmd, &ses); + timeleft -= stall_for; msleep(jiffies_to_msecs(stall_for)); goto retry; } /* signal not to enter either branch of the if () below */ timeleft = 0; - rtn = NEEDS_RETRY; + rtn = FAILED; } else { timeleft = wait_for_completion_timeout(&done, timeout); + rtn = SUCCESS; } shost->eh_action = NULL; scsi_log_completion(scmd, rtn); - SCSI_LOG_ERROR_RECOVERY(3, - printk("%s: scmd: %p, timeleft: %ld\n", - __func__, scmd, timeleft)); + SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, + "%s timeleft: %ld\n", + __func__, timeleft)); /* * If there is time left scsi_eh_done got called, and we will examine @@ -835,15 +1236,13 @@ retry: */ if (timeleft) { rtn = scsi_eh_completed_normally(scmd); - SCSI_LOG_ERROR_RECOVERY(3, - printk("%s: scsi_eh_completed_normally %x\n", - __func__, rtn)); + SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, + "%s: scsi_eh_completed_normally %x\n", __func__, rtn)); switch (rtn) { case SUCCESS: case NEEDS_RETRY: case FAILED: - case TARGET_ERROR: break; case ADD_TO_MLQUEUE: rtn = NEEDS_RETRY; @@ -852,19 +1251,13 @@ retry: rtn = FAILED; break; } - } else if (!rtn) { + } else if (rtn != FAILED) { scsi_abort_eh_cmnd(scmd); rtn = FAILED; } scsi_eh_restore_cmnd(scmd, &ses); - if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { - struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); - if (sdrv->eh_action) - rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn); - } - return rtn; } @@ -877,11 +1270,22 @@ retry: * that we obtain it on our own. This function will *not* return until * the command either times out, or it completes. */ -static int scsi_request_sense(struct scsi_cmnd *scmd) +static enum scsi_disposition scsi_request_sense(struct scsi_cmnd *scmd) { return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0); } +static enum scsi_disposition +scsi_eh_action(struct scsi_cmnd *scmd, enum scsi_disposition rtn) +{ + if (!blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) { + struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); + if (sdrv->eh_action) + rtn = sdrv->eh_action(scmd, rtn); + } + return rtn; +} + /** * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. * @scmd: Original SCSI cmd that eh has finished. @@ -896,8 +1300,6 @@ static int scsi_request_sense(struct scsi_cmnd *scmd) */ void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) { - scmd->device->host->host_failed--; - scmd->eh_eflags = 0; list_move_tail(&scmd->eh_entry, done_q); } EXPORT_SYMBOL(scsi_eh_finish_cmd); @@ -926,13 +1328,35 @@ int scsi_eh_get_sense(struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; - int rtn; + struct Scsi_Host *shost; + enum scsi_disposition rtn; + /* + * If SCSI_EH_ABORT_SCHEDULED has been set, it is timeout IO, + * should not get sense. + */ list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || + if ((scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) || SCSI_SENSE_VALID(scmd)) continue; + shost = scmd->device->host; + if (scsi_host_eh_past_deadline(shost)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "%s: skip request sense, past eh deadline\n", + current->comm)); + break; + } + if (!scsi_status_is_check_condition(scmd->result)) + /* + * don't request sense if there's no check condition + * status because the error we're processing isn't one + * that has a sense code (and some devices get + * confused by sense requests out of the blue) + */ + continue; + SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, "%s: requesting sense\n", current->comm)); @@ -940,10 +1364,9 @@ int scsi_eh_get_sense(struct list_head *work_q, if (rtn != SUCCESS) continue; - SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p" - " result %x\n", scmd, - scmd->result)); - SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd)); + SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, + "sense requested, result %x\n", scmd->result)); + SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense(scmd)); rtn = scsi_decide_disposition(scmd); @@ -952,11 +1375,18 @@ int scsi_eh_get_sense(struct list_head *work_q, * upper level. */ if (rtn == SUCCESS) - /* we don't want this command reissued, just - * finished with the sense data, so set - * retries to the max allowed to ensure it - * won't get reissued */ - scmd->retries = scmd->allowed; + /* + * We don't want this command reissued, just finished + * with the sense data, so set retries to the max + * allowed to ensure it won't get reissued. If the user + * has requested infinite retries, we also want to + * finish this command, so force completion by setting + * retries and allowed to the same value. + */ + if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT) + scmd->retries = scmd->allowed = 1; + else + scmd->retries = scmd->allowed; else if (rtn != NEEDS_RETRY) continue; @@ -977,20 +1407,21 @@ EXPORT_SYMBOL_GPL(scsi_eh_get_sense); static int scsi_eh_tur(struct scsi_cmnd *scmd) { static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; - int retry_cnt = 1, rtn; + int retry_cnt = 1; + enum scsi_disposition rtn; retry_tur: rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, scmd->device->eh_timeout, 0); - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", - __func__, scmd, rtn)); + SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, + "%s return: %x\n", __func__, rtn)); switch (rtn) { case NEEDS_RETRY: if (retry_cnt--) goto retry_tur; - /*FALLTHRU*/ + fallthrough; case SUCCESS: return 0; default: @@ -1001,9 +1432,9 @@ retry_tur: /** * scsi_eh_test_devices - check if devices are responding from error recovery. * @cmd_list: scsi commands in error recovery. - * @work_q: queue for commands which still need more error recovery - * @done_q: queue for commands which are finished - * @try_stu: boolean on if a STU command should be tried in addition to TUR. + * @work_q: queue for commands which still need more error recovery + * @done_q: queue for commands which are finished + * @try_stu: boolean on if a STU command should be tried in addition to TUR. * * Decription: * Tests if devices are in a working state. Commands to devices now in @@ -1023,6 +1454,18 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); sdev = scmd->device; + if (!try_stu) { + if (scsi_host_eh_past_deadline(sdev->host)) { + /* Push items back onto work_q */ + list_splice_init(cmd_list, work_q); + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "%s: skip test device, past eh deadline", + current->comm)); + break; + } + } + finish_cmds = !scsi_device_online(scmd->device) || (try_stu && !scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) || @@ -1030,7 +1473,9 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, list_for_each_entry_safe(scmd, next, cmd_list, eh_entry) if (scmd->device == sdev) { - if (finish_cmds) + if (finish_cmds && + (try_stu || + scsi_eh_action(scmd, SUCCESS) == SUCCESS)) scsi_eh_finish_cmd(scmd, done_q); else list_move_tail(&scmd->eh_entry, work_q); @@ -1039,50 +1484,6 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, return list_empty(work_q); } - -/** - * scsi_eh_abort_cmds - abort pending commands. - * @work_q: &list_head for pending commands. - * @done_q: &list_head for processed commands. - * - * Decription: - * Try and see whether or not it makes sense to try and abort the - * running command. This only works out to be the case if we have one - * command that has timed out. If the command simply failed, it makes - * no sense to try and abort the command, since as far as the shost - * adapter is concerned, it isn't running. - */ -static int scsi_eh_abort_cmds(struct list_head *work_q, - struct list_head *done_q) -{ - struct scsi_cmnd *scmd, *next; - LIST_HEAD(check_list); - int rtn; - - list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD)) - continue; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:" - "0x%p\n", current->comm, - scmd)); - rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd); - if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { - scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; - if (rtn == FAST_IO_FAIL) - scsi_eh_finish_cmd(scmd, done_q); - else - list_move_tail(&scmd->eh_entry, &check_list); - } else - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting" - " cmd failed:" - "0x%p\n", - current->comm, - scmd)); - } - - return scsi_eh_test_devices(&check_list, work_q, done_q, 0); -} - /** * scsi_eh_try_stu - Send START_UNIT to device. * @scmd: &scsi_cmnd to send START_UNIT @@ -1095,10 +1496,12 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd) static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0}; if (scmd->device->allow_restart) { - int i, rtn = NEEDS_RETRY; + int i; + enum scsi_disposition rtn = NEEDS_RETRY; for (i = 0; rtn == NEEDS_RETRY && i < 2; i++) - rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, scmd->device->request_queue->rq_timeout, 0); + rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, + scmd->device->eh_timeout, 0); if (rtn == SUCCESS) return 0; @@ -1110,7 +1513,7 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd) /** * scsi_eh_stu - send START_UNIT if needed * @shost: &scsi host being recovered. - * @work_q: &list_head for pending commands. + * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. * * Notes: @@ -1125,6 +1528,14 @@ static int scsi_eh_stu(struct Scsi_Host *shost, struct scsi_device *sdev; shost_for_each_device(sdev, shost) { + if (scsi_host_eh_past_deadline(shost)) { + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "%s: skip START_UNIT, past eh deadline\n", + current->comm)); + scsi_device_put(sdev); + break; + } stu_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && @@ -1136,22 +1547,26 @@ static int scsi_eh_stu(struct Scsi_Host *shost, if (!stu_scmd) continue; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:" - " 0x%p\n", current->comm, sdev)); + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "%s: Sending START_UNIT\n", + current->comm)); if (!scsi_eh_try_stu(stu_scmd)) { if (!scsi_device_online(sdev) || !scsi_eh_tur(stu_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (scmd->device == sdev) + if (scmd->device == sdev && + scsi_eh_action(scmd, SUCCESS) == SUCCESS) scsi_eh_finish_cmd(scmd, done_q); } } } else { SCSI_LOG_ERROR_RECOVERY(3, - printk("%s: START_UNIT failed to sdev:" - " 0x%p\n", current->comm, sdev)); + sdev_printk(KERN_INFO, sdev, + "%s: START_UNIT failed\n", + current->comm)); } } @@ -1162,7 +1577,7 @@ static int scsi_eh_stu(struct Scsi_Host *shost, /** * scsi_eh_bus_device_reset - send bdr if needed * @shost: scsi host being recovered. - * @work_q: &list_head for pending commands. + * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. * * Notes: @@ -1177,9 +1592,17 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, { struct scsi_cmnd *scmd, *bdr_scmd, *next; struct scsi_device *sdev; - int rtn; + enum scsi_disposition rtn; shost_for_each_device(sdev, shost) { + if (scsi_host_eh_past_deadline(shost)) { + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "%s: skip BDR, past eh deadline\n", + current->comm)); + scsi_device_put(sdev); + break; + } bdr_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) if (scmd->device == sdev) { @@ -1190,9 +1613,9 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, if (!bdr_scmd) continue; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:" - " 0x%p\n", current->comm, - sdev)); + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "%s: Sending BDR\n", current->comm)); rtn = scsi_try_bus_device_reset(bdr_scmd); if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { if (!scsi_device_online(sdev) || @@ -1200,17 +1623,16 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, !scsi_eh_tur(bdr_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { - if (scmd->device == sdev) + if (scmd->device == sdev && + scsi_eh_action(scmd, rtn) != FAILED) scsi_eh_finish_cmd(scmd, done_q); } } } else { - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR" - " failed sdev:" - "0x%p\n", - current->comm, - sdev)); + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "%s: BDR failed\n", current->comm)); } } @@ -1220,7 +1642,7 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, /** * scsi_eh_target_reset - send target reset if needed * @shost: scsi host being recovered. - * @work_q: &list_head for pending commands. + * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. * * Notes: @@ -1237,21 +1659,34 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost, while (!list_empty(&tmp_list)) { struct scsi_cmnd *next, *scmd; - int rtn; + enum scsi_disposition rtn; unsigned int id; + if (scsi_host_eh_past_deadline(shost)) { + /* push back on work queue for further processing */ + list_splice_init(&check_list, work_q); + list_splice_init(&tmp_list, work_q); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: Skip target reset, past eh deadline\n", + current->comm)); + return list_empty(work_q); + } + scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry); id = scmd_id(scmd); - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending target reset " - "to target %d\n", - current->comm, id)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: Sending target reset to target %d\n", + current->comm, id)); rtn = scsi_try_target_reset(scmd); if (rtn != SUCCESS && rtn != FAST_IO_FAIL) - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Target reset" - " failed target: " - "%d\n", - current->comm, id)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: Target reset failed" + " target: %d\n", + current->comm, id)); list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) { if (scmd_id(scmd) != id) continue; @@ -1272,7 +1707,7 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost, /** * scsi_eh_bus_reset - send a bus reset * @shost: &scsi host being recovered. - * @work_q: &list_head for pending commands. + * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. */ static int scsi_eh_bus_reset(struct Scsi_Host *shost, @@ -1282,7 +1717,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, struct scsi_cmnd *scmd, *chan_scmd, *next; LIST_HEAD(check_list); unsigned int channel; - int rtn; + enum scsi_disposition rtn; /* * we really want to loop over the various channels, and do this on @@ -1292,6 +1727,15 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, */ for (channel = 0; channel <= shost->max_channel; channel++) { + if (scsi_host_eh_past_deadline(shost)) { + list_splice_init(&check_list, work_q); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: skip BRST, past eh deadline\n", + current->comm)); + return list_empty(work_q); + } + chan_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) { if (channel == scmd_channel(scmd)) { @@ -1306,9 +1750,10 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, if (!chan_scmd) continue; - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:" - " %d\n", current->comm, - channel)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: Sending BRST chan: %d\n", + current->comm, channel)); rtn = scsi_try_bus_reset(chan_scmd); if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { @@ -1322,10 +1767,10 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, } } } else { - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST" - " failed chan: %d\n", - current->comm, - channel)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: BRST failed chan: %d\n", + current->comm, channel)); } } return scsi_eh_test_devices(&check_list, work_q, done_q, 0); @@ -1333,22 +1778,26 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, /** * scsi_eh_host_reset - send a host reset - * @work_q: list_head for processed commands. - * @done_q: list_head for processed commands. + * @shost: host to be reset. + * @work_q: &list_head for pending commands. + * @done_q: &list_head for processed commands. */ -static int scsi_eh_host_reset(struct list_head *work_q, +static int scsi_eh_host_reset(struct Scsi_Host *shost, + struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; LIST_HEAD(check_list); - int rtn; + enum scsi_disposition rtn; if (!list_empty(work_q)) { scmd = list_entry(work_q->next, struct scsi_cmnd, eh_entry); - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n" - , current->comm)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: Sending HRST\n", + current->comm)); rtn = scsi_try_host_reset(scmd); if (rtn == SUCCESS) { @@ -1358,9 +1807,10 @@ static int scsi_eh_host_reset(struct list_head *work_q, scsi_eh_finish_cmd(scmd, done_q); } } else { - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST" - " failed\n", - current->comm)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "%s: HRST failed\n", + current->comm)); } } return scsi_eh_test_devices(&check_list, work_q, done_q, 1); @@ -1368,62 +1818,70 @@ static int scsi_eh_host_reset(struct list_head *work_q, /** * scsi_eh_offline_sdevs - offline scsi devices that fail to recover - * @work_q: list_head for processed commands. - * @done_q: list_head for processed commands. + * @work_q: &list_head for pending commands. + * @done_q: &list_head for processed commands. */ static void scsi_eh_offline_sdevs(struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; + struct scsi_device *sdev; list_for_each_entry_safe(scmd, next, work_q, eh_entry) { sdev_printk(KERN_INFO, scmd->device, "Device offlined - " "not ready after error recovery\n"); - scsi_device_set_state(scmd->device, SDEV_OFFLINE); - if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) { - /* - * FIXME: Handle lost cmds. - */ - } + sdev = scmd->device; + + mutex_lock(&sdev->state_mutex); + scsi_device_set_state(sdev, SDEV_OFFLINE); + mutex_unlock(&sdev->state_mutex); + scsi_eh_finish_cmd(scmd, done_q); } return; } /** - * scsi_noretry_cmd - determinte if command should be failed fast + * scsi_noretry_cmd - determine if command should be failed fast * @scmd: SCSI cmd to examine. */ -int scsi_noretry_cmd(struct scsi_cmnd *scmd) +bool scsi_noretry_cmd(struct scsi_cmnd *scmd) { + struct request *req = scsi_cmd_to_rq(scmd); + switch (host_byte(scmd->result)) { case DID_OK: break; + case DID_TIME_OUT: + goto check_type; case DID_BUS_BUSY: - return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT); + return !!(req->cmd_flags & REQ_FAILFAST_TRANSPORT); case DID_PARITY: - return (scmd->request->cmd_flags & REQ_FAILFAST_DEV); + return !!(req->cmd_flags & REQ_FAILFAST_DEV); case DID_ERROR: - if (msg_byte(scmd->result) == COMMAND_COMPLETE && - status_byte(scmd->result) == RESERVATION_CONFLICT) - return 0; - /* fall through */ + if (get_status_byte(scmd) == SAM_STAT_RESERVATION_CONFLICT) + return false; + fallthrough; case DID_SOFT_ERROR: - return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER); + return !!(req->cmd_flags & REQ_FAILFAST_DRIVER); } - switch (status_byte(scmd->result)) { - case CHECK_CONDITION: - /* - * assume caller has checked sense and determinted - * the check condition was retryable. - */ - if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || - scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) - return 1; - } + /* Never retry commands aborted due to a duration limit timeout */ + if (scsi_ml_byte(scmd->result) == SCSIML_STAT_DL_TIMEOUT) + return true; - return 0; + if (!scsi_status_is_check_condition(scmd->result)) + return false; + +check_type: + /* + * assume caller has checked sense and determined + * the check condition was retryable. + */ + if (req->cmd_flags & REQ_FAILFAST_DEV || blk_rq_is_passthrough(req)) + return true; + + return false; } /** @@ -1440,18 +1898,17 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd) * doesn't require the error handler read (i.e. we don't need to * abort/reset), this function should return SUCCESS. */ -int scsi_decide_disposition(struct scsi_cmnd *scmd) +enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd) { - int rtn; + enum scsi_disposition rtn; /* * if the device is offline, then we clearly just pass the result back * up to the top level. */ if (!scsi_device_online(scmd->device)) { - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report" - " as SUCCESS\n", - __func__)); + SCSI_LOG_ERROR_RECOVERY(5, scmd_printk(KERN_INFO, scmd, + "%s: device offline - report as SUCCESS\n", __func__)); return SUCCESS; } @@ -1473,25 +1930,26 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) * looks good. drop through, and check the next byte. */ break; + case DID_ABORT: + if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { + set_host_byte(scmd, DID_TIME_OUT); + return SUCCESS; + } + fallthrough; case DID_NO_CONNECT: case DID_BAD_TARGET: - case DID_ABORT: /* * note - this means that we just report the status back * to the top level driver, not that we actually think * that it indicates SUCCESS. */ return SUCCESS; + case DID_SOFT_ERROR: /* * when the low level driver returns did_soft_error, * it is responsible for keeping an internal retry counter * in order to avoid endless loops (db) - * - * actually this is a bug in this function here. we should - * be mindful of the maximum number of retries specified - * and not get stuck in a loop. */ - case DID_SOFT_ERROR: goto maybe_retry; case DID_IMM_RETRY: return NEEDS_RETRY; @@ -1513,15 +1971,20 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) * the fast io fail tmo fired), so send IO directly upwards. */ return SUCCESS; + case DID_TRANSPORT_MARGINAL: + /* + * caller has decided not to do retries on + * abort success, so send IO directly upwards + */ + return SUCCESS; case DID_ERROR: - if (msg_byte(scmd->result) == COMMAND_COMPLETE && - status_byte(scmd->result) == RESERVATION_CONFLICT) + if (get_status_byte(scmd) == SAM_STAT_RESERVATION_CONFLICT) /* * execute reservation conflict processing code * lower down */ break; - /* fallthrough */ + fallthrough; case DID_BUS_BUSY: case DID_PARITY: goto maybe_retry; @@ -1544,22 +2007,17 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) } /* - * next, check the message byte. - */ - if (msg_byte(scmd->result) != COMMAND_COMPLETE) - return FAILED; - - /* * check the status byte to see if this indicates anything special. */ - switch (status_byte(scmd->result)) { - case QUEUE_FULL: + switch (get_status_byte(scmd)) { + case SAM_STAT_TASK_SET_FULL: scsi_handle_queue_full(scmd->device); /* * the case of trying to send too many commands to a * tagged queueing device. */ - case BUSY: + fallthrough; + case SAM_STAT_BUSY: /* * device can't talk to us at the moment. Should only * occur (SAM-3) when the task queue is empty, so will cause @@ -1567,56 +2025,56 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) * device. */ return ADD_TO_MLQUEUE; - case GOOD: + case SAM_STAT_GOOD: + if (scmd->cmnd[0] == REPORT_LUNS) + scmd->device->sdev_target->expecting_lun_change = 0; scsi_handle_queue_ramp_up(scmd->device); - case COMMAND_TERMINATED: + if (scmd->sense_buffer && SCSI_SENSE_VALID(scmd)) + /* + * If we have sense data, call scsi_check_sense() in + * order to set the correct SCSI ML byte (if any). + * No point in checking the return value, since the + * command has already completed successfully. + */ + scsi_check_sense(scmd); + fallthrough; + case SAM_STAT_COMMAND_TERMINATED: return SUCCESS; - case TASK_ABORTED: + case SAM_STAT_TASK_ABORTED: goto maybe_retry; - case CHECK_CONDITION: + case SAM_STAT_CHECK_CONDITION: rtn = scsi_check_sense(scmd); if (rtn == NEEDS_RETRY) goto maybe_retry; - else if (rtn == TARGET_ERROR) { - /* - * Need to modify host byte to signal a - * permanent target failure - */ - set_host_byte(scmd, DID_TARGET_FAILURE); - rtn = SUCCESS; - } /* if rtn == FAILED, we have no sense information; * returning FAILED will wake the error handler thread * to collect the sense and redo the decide * disposition */ return rtn; - case CONDITION_GOOD: - case INTERMEDIATE_GOOD: - case INTERMEDIATE_C_GOOD: - case ACA_ACTIVE: + case SAM_STAT_CONDITION_MET: + case SAM_STAT_INTERMEDIATE: + case SAM_STAT_INTERMEDIATE_CONDITION_MET: + case SAM_STAT_ACA_ACTIVE: /* * who knows? FIXME(eric) */ return SUCCESS; - case RESERVATION_CONFLICT: + case SAM_STAT_RESERVATION_CONFLICT: sdev_printk(KERN_INFO, scmd->device, "reservation conflict\n"); - set_host_byte(scmd, DID_NEXUS_FAILURE); + set_scsi_ml_byte(scmd, SCSIML_STAT_RESV_CONFLICT); return SUCCESS; /* causes immediate i/o error */ - default: - return FAILED; } return FAILED; - maybe_retry: +maybe_retry: /* we requeue for retry because the error was retryable, and * the request was not marked fast fail. Note that above, * even if the request is marked fast fail, we still requeue * for queue congestion conditions (QUEUE_FULL or BUSY) */ - if ((++scmd->retries) <= scmd->allowed - && !scsi_noretry_cmd(scmd)) { + if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) { return NEEDS_RETRY; } else { /* @@ -1626,9 +2084,11 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) } } -static void eh_lock_door_done(struct request *req, int uptodate) +static enum rq_end_io_ret eh_lock_door_done(struct request *req, + blk_status_t status) { - __blk_put_request(req->q, req); + blk_mq_free_request(req); + return RQ_END_IO_NONE; } /** @@ -1644,29 +2104,28 @@ static void eh_lock_door_done(struct request *req, int uptodate) */ static void scsi_eh_lock_door(struct scsi_device *sdev) { + struct scsi_cmnd *scmd; struct request *req; - /* - * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a - * request becomes available - */ - req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); - - req->cmd[0] = ALLOW_MEDIUM_REMOVAL; - req->cmd[1] = 0; - req->cmd[2] = 0; - req->cmd[3] = 0; - req->cmd[4] = SCSI_REMOVAL_PREVENT; - req->cmd[5] = 0; - - req->cmd_len = COMMAND_SIZE(req->cmd[0]); - - req->cmd_type = REQ_TYPE_BLOCK_PC; - req->cmd_flags |= REQ_QUIET; + req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) + return; + scmd = blk_mq_rq_to_pdu(req); + + scmd->cmnd[0] = ALLOW_MEDIUM_REMOVAL; + scmd->cmnd[1] = 0; + scmd->cmnd[2] = 0; + scmd->cmnd[3] = 0; + scmd->cmnd[4] = SCSI_REMOVAL_PREVENT; + scmd->cmnd[5] = 0; + scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); + scmd->allowed = 5; + + req->rq_flags |= RQF_QUIET; req->timeout = 10 * HZ; - req->retries = 5; + req->end_io = eh_lock_door_done; - blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done); + blk_execute_rq_nowait(req, true); } /** @@ -1688,8 +2147,10 @@ static void scsi_restart_operations(struct Scsi_Host *shost) * is no point trying to lock the door of an off-line device. */ shost_for_each_device(sdev, shost) { - if (scsi_device_online(sdev) && sdev->locked) + if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) { scsi_eh_lock_door(sdev); + sdev->was_reset = 0; + } } /* @@ -1697,8 +2158,8 @@ static void scsi_restart_operations(struct Scsi_Host *shost) * will be requests for character device operations, and also for * ioctls to queued block devices. */ - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", - __func__)); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, "waking up host to restart\n")); spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RUNNING)) @@ -1733,8 +2194,8 @@ static void scsi_restart_operations(struct Scsi_Host *shost) /** * scsi_eh_ready_devs - check device ready state and recover if not. - * @shost: host to be recovered. - * @work_q: &list_head for pending commands. + * @shost: host to be recovered. + * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. */ void scsi_eh_ready_devs(struct Scsi_Host *shost, @@ -1745,7 +2206,7 @@ void scsi_eh_ready_devs(struct Scsi_Host *shost, if (!scsi_eh_bus_device_reset(shost, work_q, done_q)) if (!scsi_eh_target_reset(shost, work_q, done_q)) if (!scsi_eh_bus_reset(shost, work_q, done_q)) - if (!scsi_eh_host_reset(work_q, done_q)) + if (!scsi_eh_host_reset(shost, work_q, done_q)) scsi_eh_offline_sdevs(work_q, done_q); } @@ -1760,26 +2221,31 @@ void scsi_eh_flush_done_q(struct list_head *done_q) struct scsi_cmnd *scmd, *next; list_for_each_entry_safe(scmd, next, done_q, eh_entry) { + struct scsi_device *sdev = scmd->device; + list_del_init(&scmd->eh_entry); - if (scsi_device_online(scmd->device) && - !scsi_noretry_cmd(scmd) && - (++scmd->retries <= scmd->allowed)) { - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush" - " retry cmd: %p\n", - current->comm, - scmd)); + if (scsi_device_online(sdev) && !scsi_noretry_cmd(scmd) && + scsi_cmd_retry_allowed(scmd) && + scsi_eh_should_retry_cmd(scmd)) { + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "%s: flush retry cmd\n", + current->comm)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + blk_mq_kick_requeue_list(sdev->request_queue); } else { /* * If just we got sense for the device (called * scsi_eh_get_sense), scmd->result is already - * set, do not set DRIVER_TIMEOUT. + * set, do not set DID_TIME_OUT. */ - if (!scmd->result) - scmd->result |= (DRIVER_TIMEOUT << 24); - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish" - " cmd: %p\n", - current->comm, scmd)); + if (!scmd->result && + !(scmd->flags & SCMD_FORCE_EH_SUCCESS)) + scmd->result |= (DID_TIME_OUT << 16); + SCSI_LOG_ERROR_RECOVERY(3, + scmd_printk(KERN_INFO, scmd, + "%s: flush finish cmd\n", + current->comm)); scsi_finish_command(scmd); } } @@ -1822,9 +2288,12 @@ static void scsi_unjam_host(struct Scsi_Host *shost) SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q)); if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q)) - if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q)) - scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); + scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); + spin_lock_irqsave(shost->host_lock, flags); + if (shost->eh_deadline != -1) + shost->last_reset = 0; + spin_unlock_irqrestore(shost->host_lock, flags); scsi_eh_flush_done_q(&eh_done_q); } @@ -1846,21 +2315,34 @@ int scsi_error_handler(void *data) * We never actually get interrupted because kthread_run * disables signal delivery for the created thread. */ - while (!kthread_should_stop()) { + while (true) { + /* + * The sequence in kthread_stop() sets the stop flag first + * then wakes the process. To avoid missed wakeups, the task + * should always be in a non running state before the stop + * flag is checked + */ set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || - shost->host_failed != shost->host_busy) { + shost->host_failed != scsi_host_busy(shost)) { SCSI_LOG_ERROR_RECOVERY(1, - printk("Error handler scsi_eh_%d sleeping\n", - shost->host_no)); + shost_printk(KERN_INFO, shost, + "scsi_eh_%d: sleeping\n", + shost->host_no)); schedule(); continue; } __set_current_state(TASK_RUNNING); SCSI_LOG_ERROR_RECOVERY(1, - printk("Error handler scsi_eh_%d waking up\n", - shost->host_no)); + shost_printk(KERN_INFO, shost, + "scsi_eh_%d: waking up %d/%d/%d\n", + shost->host_no, shost->host_eh_scheduled, + shost->host_failed, + scsi_host_busy(shost))); /* * We have a host that is failing for some reason. Figure out @@ -1869,9 +2351,9 @@ int scsi_error_handler(void *data) */ if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) { SCSI_LOG_ERROR_RECOVERY(1, - printk(KERN_ERR "Error handler scsi_eh_%d " - "unable to autoresume\n", - shost->host_no)); + shost_printk(KERN_ERR, shost, + "scsi_eh_%d: unable to autoresume\n", + shost->host_no)); continue; } @@ -1880,6 +2362,9 @@ int scsi_error_handler(void *data) else scsi_unjam_host(shost); + /* All scmds have been handled */ + shost->host_failed = 0; + /* * Note - if the above fails completely, the action is to take * individual devices offline and flush the queue of any @@ -1894,19 +2379,21 @@ int scsi_error_handler(void *data) __set_current_state(TASK_RUNNING); SCSI_LOG_ERROR_RECOVERY(1, - printk("Error handler scsi_eh_%d exiting\n", shost->host_no)); + shost_printk(KERN_INFO, shost, + "Error handler scsi_eh_%d exiting\n", + shost->host_no)); shost->ehandler = NULL; return 0; } -/* - * Function: scsi_report_bus_reset() +/** + * scsi_report_bus_reset() - report bus reset observed * - * Purpose: Utility function used by low-level drivers to report that - * they have observed a bus reset on the bus being handled. + * Utility function used by low-level drivers to report that + * they have observed a bus reset on the bus being handled. * - * Arguments: shost - Host in question - * channel - channel on which reset was observed. + * @shost: Host in question + * @channel: channel on which reset was observed. * * Returns: Nothing * @@ -1931,15 +2418,15 @@ void scsi_report_bus_reset(struct Scsi_Host *shost, int channel) } EXPORT_SYMBOL(scsi_report_bus_reset); -/* - * Function: scsi_report_device_reset() +/** + * scsi_report_device_reset() - report device reset observed * - * Purpose: Utility function used by low-level drivers to report that - * they have observed a device reset on the device being handled. + * Utility function used by low-level drivers to report that + * they have observed a device reset on the device being handled. * - * Arguments: shost - Host in question - * channel - channel on which reset was observed - * target - target on which reset was observed + * @shost: Host in question + * @channel: channel on which reset was observed + * @target: target on which reset was observed * * Returns: Nothing * @@ -1965,43 +2452,43 @@ void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target) } EXPORT_SYMBOL(scsi_report_device_reset); -static void -scsi_reset_provider_done_command(struct scsi_cmnd *scmd) -{ -} - -/* - * Function: scsi_reset_provider - * - * Purpose: Send requested reset to a bus or device at any phase. - * - * Arguments: device - device to send reset to - * flag - reset type (see scsi.h) - * - * Returns: SUCCESS/FAILURE. - * - * Notes: This is used by the SCSI Generic driver to provide - * Bus/Device reset capability. +/** + * scsi_ioctl_reset: explicitly reset a host/bus/target/device + * @dev: scsi_device to operate on + * @arg: reset type (see sg.h) */ int -scsi_reset_provider(struct scsi_device *dev, int flag) +scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) { struct scsi_cmnd *scmd; struct Scsi_Host *shost = dev->host; - struct request req; + struct request *rq; unsigned long flags; - int rtn; + int error = 0, val; + enum scsi_disposition rtn; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + + error = get_user(val, arg); + if (error) + return error; if (scsi_autopm_get_host(shost) < 0) - return FAILED; + return -EIO; - scmd = scsi_get_command(dev, GFP_KERNEL); - blk_rq_init(NULL, &req); - scmd->request = &req; + error = -EIO; + rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) + + shost->hostt->cmd_size, GFP_KERNEL); + if (!rq) + goto out_put_autopm_host; + blk_rq_init(NULL, rq); - scmd->cmnd = req.cmd; + scmd = (struct scsi_cmnd *)(rq + 1); + scsi_init_command(dev, scmd); - scmd->scsi_done = scsi_reset_provider_done_command; + scmd->submitter = SUBMITTED_BY_SCSI_RESET_IOCTL; + scmd->flags |= SCMD_LAST; memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->cmd_len = 0; @@ -2012,29 +2499,37 @@ scsi_reset_provider(struct scsi_device *dev, int flag) shost->tmf_in_progress = 1; spin_unlock_irqrestore(shost->host_lock, flags); - switch (flag) { - case SCSI_TRY_RESET_DEVICE: + switch (val & ~SG_SCSI_RESET_NO_ESCALATE) { + case SG_SCSI_RESET_NOTHING: + rtn = SUCCESS; + break; + case SG_SCSI_RESET_DEVICE: rtn = scsi_try_bus_device_reset(scmd); - if (rtn == SUCCESS) + if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) break; - /* FALLTHROUGH */ - case SCSI_TRY_RESET_TARGET: + fallthrough; + case SG_SCSI_RESET_TARGET: rtn = scsi_try_target_reset(scmd); - if (rtn == SUCCESS) + if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) break; - /* FALLTHROUGH */ - case SCSI_TRY_RESET_BUS: + fallthrough; + case SG_SCSI_RESET_BUS: rtn = scsi_try_bus_reset(scmd); - if (rtn == SUCCESS) + if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) break; - /* FALLTHROUGH */ - case SCSI_TRY_RESET_HOST: + fallthrough; + case SG_SCSI_RESET_HOST: rtn = scsi_try_host_reset(scmd); - break; + if (rtn == SUCCESS) + break; + fallthrough; default: rtn = FAILED; + break; } + error = (rtn == SUCCESS) ? 0 : -EIO; + spin_lock_irqsave(shost->host_lock, flags); shost->tmf_in_progress = 0; spin_unlock_irqrestore(shost->host_lock, flags); @@ -2044,85 +2539,21 @@ scsi_reset_provider(struct scsi_device *dev, int flag) * suspended while we performed the TMF. */ SCSI_LOG_ERROR_RECOVERY(3, - printk("%s: waking up host to restart after TMF\n", - __func__)); + shost_printk(KERN_INFO, shost, + "waking up host to restart after TMF\n")); wake_up(&shost->host_wait); - scsi_run_host_queues(shost); - scsi_next_command(scmd); - scsi_autopm_put_host(shost); - return rtn; -} -EXPORT_SYMBOL(scsi_reset_provider); - -/** - * scsi_normalize_sense - normalize main elements from either fixed or - * descriptor sense data format into a common format. - * - * @sense_buffer: byte array containing sense data returned by device - * @sb_len: number of valid bytes in sense_buffer - * @sshdr: pointer to instance of structure that common - * elements are written to. - * - * Notes: - * The "main elements" from sense data are: response_code, sense_key, - * asc, ascq and additional_length (only for descriptor format). - * - * Typically this function can be called after a device has - * responded to a SCSI command with the CHECK_CONDITION status. - * - * Return value: - * 1 if valid sense data information found, else 0; - */ -int scsi_normalize_sense(const u8 *sense_buffer, int sb_len, - struct scsi_sense_hdr *sshdr) -{ - if (!sense_buffer || !sb_len) - return 0; - - memset(sshdr, 0, sizeof(struct scsi_sense_hdr)); - - sshdr->response_code = (sense_buffer[0] & 0x7f); - - if (!scsi_sense_valid(sshdr)) - return 0; - - if (sshdr->response_code >= 0x72) { - /* - * descriptor format - */ - if (sb_len > 1) - sshdr->sense_key = (sense_buffer[1] & 0xf); - if (sb_len > 2) - sshdr->asc = sense_buffer[2]; - if (sb_len > 3) - sshdr->ascq = sense_buffer[3]; - if (sb_len > 7) - sshdr->additional_length = sense_buffer[7]; - } else { - /* - * fixed format - */ - if (sb_len > 2) - sshdr->sense_key = (sense_buffer[2] & 0xf); - if (sb_len > 7) { - sb_len = (sb_len < (sense_buffer[7] + 8)) ? - sb_len : (sense_buffer[7] + 8); - if (sb_len > 12) - sshdr->asc = sense_buffer[12]; - if (sb_len > 13) - sshdr->ascq = sense_buffer[13]; - } - } + kfree(rq); - return 1; +out_put_autopm_host: + scsi_autopm_put_host(shost); + return error; } -EXPORT_SYMBOL(scsi_normalize_sense); -int scsi_command_normalize_sense(struct scsi_cmnd *cmd, - struct scsi_sense_hdr *sshdr) +bool scsi_command_normalize_sense(const struct scsi_cmnd *cmd, + struct scsi_sense_hdr *sshdr) { return scsi_normalize_sense(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, sshdr); @@ -2130,45 +2561,6 @@ int scsi_command_normalize_sense(struct scsi_cmnd *cmd, EXPORT_SYMBOL(scsi_command_normalize_sense); /** - * scsi_sense_desc_find - search for a given descriptor type in descriptor sense data format. - * @sense_buffer: byte array of descriptor format sense data - * @sb_len: number of valid bytes in sense_buffer - * @desc_type: value of descriptor type to find - * (e.g. 0 -> information) - * - * Notes: - * only valid when sense data is in descriptor format - * - * Return value: - * pointer to start of (first) descriptor if found else NULL - */ -const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, - int desc_type) -{ - int add_sen_len, add_len, desc_len, k; - const u8 * descp; - - if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7]))) - return NULL; - if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73)) - return NULL; - add_sen_len = (add_sen_len < (sb_len - 8)) ? - add_sen_len : (sb_len - 8); - descp = &sense_buffer[8]; - for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) { - descp += desc_len; - add_len = (k < (add_sen_len - 1)) ? descp[1]: -1; - desc_len = add_len + 2; - if (descp[0] == desc_type) - return descp; - if (add_len < 0) // short descriptor ?? - break; - } - return NULL; -} -EXPORT_SYMBOL(scsi_sense_desc_find); - -/** * scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format) * @sense_buffer: byte array of sense data * @sb_len: number of valid bytes in sense_buffer @@ -2176,72 +2568,34 @@ EXPORT_SYMBOL(scsi_sense_desc_find); * field will be placed if found. * * Return value: - * 1 if information field found, 0 if not found. + * true if information field found, false if not found. */ -int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, - u64 * info_out) +bool scsi_get_sense_info_fld(const u8 *sense_buffer, int sb_len, + u64 *info_out) { - int j; const u8 * ucp; - u64 ull; if (sb_len < 7) - return 0; + return false; switch (sense_buffer[0] & 0x7f) { case 0x70: case 0x71: if (sense_buffer[0] & 0x80) { - *info_out = (sense_buffer[3] << 24) + - (sense_buffer[4] << 16) + - (sense_buffer[5] << 8) + sense_buffer[6]; - return 1; - } else - return 0; + *info_out = get_unaligned_be32(&sense_buffer[3]); + return true; + } + return false; case 0x72: case 0x73: ucp = scsi_sense_desc_find(sense_buffer, sb_len, 0 /* info desc */); if (ucp && (0xa == ucp[1])) { - ull = 0; - for (j = 0; j < 8; ++j) { - if (j > 0) - ull <<= 8; - ull |= ucp[4 + j]; - } - *info_out = ull; - return 1; - } else - return 0; + *info_out = get_unaligned_be64(&ucp[4]); + return true; + } + return false; default: - return 0; + return false; } } EXPORT_SYMBOL(scsi_get_sense_info_fld); - -/** - * scsi_build_sense_buffer - build sense data in a buffer - * @desc: Sense format (non zero == descriptor format, - * 0 == fixed format) - * @buf: Where to build sense data - * @key: Sense key - * @asc: Additional sense code - * @ascq: Additional sense code qualifier - * - **/ -void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq) -{ - if (desc) { - buf[0] = 0x72; /* descriptor, current */ - buf[1] = key; - buf[2] = asc; - buf[3] = ascq; - buf[7] = 0; - } else { - buf[0] = 0x70; /* fixed, current */ - buf[2] = key; - buf[7] = 0xa; - buf[12] = asc; - buf[13] = ascq; - } -} -EXPORT_SYMBOL(scsi_build_sense_buffer); |
