From 4380e64a94e16c757552e8e2fbdc856415012fc8 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 9 Jan 2024 20:40:14 +0800 Subject: scsi: core: Move autosuspend timer delay to Scsi_Host The runtime suspend timer delay is a const value in scsi_host_template which a host driver cannot modify at runtime. Move the delay to Scsi_Host to allow a driver to update it. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20240109124015.31359-2-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0833b3e6aa6e..d1b87670764c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3728,7 +3728,7 @@ static int sd_probe(struct device *dev) blk_pm_runtime_init(sdp->request_queue, dev); if (sdp->rpm_autosuspend) { pm_runtime_set_autosuspend_delay(dev, - sdp->host->hostt->rpm_autosuspend_delay); + sdp->host->rpm_autosuspend_delay); } error = device_add_disk(dev, gd, NULL); -- cgit From 1008f5776fe5c398e1202c93b835943b04de3ec6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:05 -0600 Subject: scsi: sd: Use separate buf for START_STOP in sd_spinup_disk() We currently reuse the cmd buffer for the TUR and START_STOP commands which requires us to reset the buffer when retrying. This has us use separate buffers for the 2 commands so we can make them const and I think it makes it easier to handle for retries but does not add too much extra to the stack use. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-5-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Martin Wilck Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0833b3e6aa6e..3d85913d373c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2318,14 +2318,16 @@ sd_spinup_disk(struct scsi_disk *sdkp) * Issue command to spin up drive when not ready */ if (!spintime) { + /* Return immediately and start spin cycle */ + const u8 start_cmd[10] = { + [0] = START_STOP, + [1] = 1, + [4] = sdkp->device->start_stop_pwr_cond ? + 0x11 : 1, + }; + sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); - cmd[0] = START_STOP; - cmd[1] = 1; /* Return immediately */ - memset((void *) &cmd[2], 0, 8); - cmd[4] = 1; /* Start spin cycle */ - if (sdkp->device->start_stop_pwr_cond) - cmd[4] |= 1 << 4; - scsi_execute_cmd(sdkp->device, cmd, + scsi_execute_cmd(sdkp->device, start_cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); -- cgit From c1acf38cd11efdc921f7d41107b00c2cb79453fc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:06 -0600 Subject: scsi: sd: Have midlayer retry sd_spinup_disk() errors This simplifies sd_spinup_disk() so the SCSI midlayer retries errors for it. Note that we retried every UA except Medium Not Present and also if scsi_status_is_good() returned failed which would happen for all check conditions. In this patch we use SCMD_FAILURE_STAT_ANY which will trigger for the same conditions as when scsi_status_is_good() returns false and there is status. This will cover all CCs including UAs so there is no explicit failures array entry for UAs except for Medium Not Present which we don't want to retry. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. We do not handle the outside loop's retries because we want to sleep between tries and we don't support that yet. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-6-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 77 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 32 deletions(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3d85913d373c..cb240015bde5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2235,55 +2235,68 @@ static int sd_done(struct scsi_cmnd *SCpnt) static void sd_spinup_disk(struct scsi_disk *sdkp) { - unsigned char cmd[10]; + static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; - int retries, spintime; + int spintime, sense_valid = 0; unsigned int the_result; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + /* Do not retry Medium Not Present */ + { + .sense = UNIT_ATTENTION, + .asc = 0x3A, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = 0x3A, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Retry when scsi_status_is_good would return false 3 times */ + { + .result = SCMD_FAILURE_STAT_ANY, + .allowed = 3, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; - int sense_valid = 0; spintime = 0; /* Spin up drives, as required. Only do this at boot time */ /* Spinup needs to be done for module loads too. */ do { - retries = 0; + bool media_was_present = sdkp->media_present; - do { - bool media_was_present = sdkp->media_present; + scsi_failures_reset_retries(&failures); - cmd[0] = TEST_UNIT_READY; - memset((void *) &cmd[1], 0, 9); - - the_result = scsi_execute_cmd(sdkp->device, cmd, - REQ_OP_DRV_IN, NULL, 0, - SD_TIMEOUT, - sdkp->max_retries, - &exec_args); + the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, + NULL, 0, SD_TIMEOUT, + sdkp->max_retries, &exec_args); - if (the_result > 0) { - /* - * If the drive has indicated to us that it - * doesn't have any media in it, don't bother - * with any more polling. - */ - if (media_not_present(sdkp, &sshdr)) { - if (media_was_present) - sd_printk(KERN_NOTICE, sdkp, - "Media removed, stopped polling\n"); - return; - } - sense_valid = scsi_sense_valid(&sshdr); + if (the_result > 0) { + /* + * If the drive has indicated to us that it doesn't + * have any media in it, don't bother with any more + * polling. + */ + if (media_not_present(sdkp, &sshdr)) { + if (media_was_present) + sd_printk(KERN_NOTICE, sdkp, + "Media removed, stopped polling\n"); + return; } - retries++; - } while (retries < 3 && - (!scsi_status_is_good(the_result) || - (scsi_status_is_check_condition(the_result) && - sense_valid && sshdr.sense_key == UNIT_ATTENTION))); + sense_valid = scsi_sense_valid(&sshdr); + } if (!scsi_status_is_check_condition(the_result)) { /* no sense, TUR either succeeded or failed -- cgit From 183053203d4532431bfdbddc04dd9306a03164a5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:10 -0600 Subject: scsi: sd: Have midlayer retry sd_sync_cache() errors This has sd_sync_cache() have the SCSI midlayer retry errors instead of driving them itself. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-10-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cb240015bde5..c2068d83c812 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1645,36 +1645,35 @@ out: static int sd_sync_cache(struct scsi_disk *sdkp) { - int retries, res; + int res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; + /* Leave the rest of the command zero to indicate flush everything. */ + const unsigned char cmd[16] = { sdp->use_16_for_sync ? + SYNCHRONIZE_CACHE_16 : SYNCHRONIZE_CACHE }; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + .allowed = 3, + .result = SCMD_FAILURE_RESULT_ANY, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, .sshdr = &sshdr, + .failures = &failures, }; if (!scsi_device_online(sdp)) return -ENODEV; - for (retries = 3; retries > 0; --retries) { - unsigned char cmd[16] = { 0 }; - - if (sdp->use_16_for_sync) - cmd[0] = SYNCHRONIZE_CACHE_16; - else - cmd[0] = SYNCHRONIZE_CACHE; - /* - * Leave the rest of the command zero to indicate - * flush everything. - */ - res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, - timeout, sdkp->max_retries, &exec_args); - if (res == 0) - break; - } - + res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, + sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Synchronize Cache(10) failed", res); -- cgit From eea6ef3792e34bd9476bef2fad074a8ce24915ec Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:15 -0600 Subject: scsi: sd: Have pr commands retry UAs It's common to get a UA when doing PR commands. It could be due to a target restarting, transport level relogin or other PR commands like a release causing it. The upper layers don't get the sense and in some cases have no idea if it's a SCSI device, so this has the sd layer retry. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-15-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c2068d83c812..4196f722c3f6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1800,8 +1800,22 @@ static int sd_pr_in_command(struct block_device *bdev, u8 sa, struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa }; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = 5, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; int result; @@ -1888,8 +1902,22 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + { + .sense = UNIT_ATTENTION, + .asc = SCMD_FAILURE_ASC_ANY, + .ascq = SCMD_FAILURE_ASCQ_ANY, + .allowed = 5, + .result = SAM_STAT_CHECK_CONDITION, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; int result; u8 cmd[16] = { 0, }; -- cgit From 0f11328f2f46618c8c4734041fdb2aacfa99b802 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 22 Jan 2024 18:22:16 -0600 Subject: scsi: sd: Have midlayer retry read_capacity_10() errors This has read_capacity_10() have the SCSI midlayer retry errors instead of driving them itself. There are 2 behavior changes with this patch: 1. There is one behavior change where we no longer retry when scsi_execute_cmd() returns < 0, but we should be ok. We don't need to retry for failures like the queue being removed, and for the case where there are no tags/reqs since the block layer waits/retries for us. For possible memory allocation failures from blk_rq_map_kern() we use GFP_NOIO, so retrying will probably not help. 2. For the specific UAs we checked for and retried, we would get READ_CAPACITY_RETRIES_ON_RESET retries plus whatever retries were left from the main loop's retries. Each UA now gets READ_CAPACITY_RETRIES_ON_RESET retries, and the other errors get up to 3 retries. This is most likely ok, because READ_CAPACITY_RETRIES_ON_RESET is already 10 and is not based on anything specific like a spec or device, so the extra 3 we got from the main loop was probably just an accident and is not going to help. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240123002220.129141-16-michael.christie@oracle.com Acked-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 62 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 23 deletions(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4196f722c3f6..49159dcd638d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2588,42 +2588,58 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { - unsigned char cmd[16]; + static const u8 cmd[10] = { READ_CAPACITY }; struct scsi_sense_hdr sshdr; + struct scsi_failure failure_defs[] = { + /* Do not retry Medium Not Present */ + { + .sense = UNIT_ATTENTION, + .asc = 0x3A, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = 0x3A, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Device reset might occur several times so retry a lot */ + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + .allowed = READ_CAPACITY_RETRIES_ON_RESET, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Any other error not listed above retry 3 times */ + { + .result = SCMD_FAILURE_RESULT_ANY, + .allowed = 3, + }, + {} + }; + struct scsi_failures failures = { + .failure_definitions = failure_defs, + }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, + .failures = &failures, }; int sense_valid = 0; int the_result; - int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; sector_t lba; unsigned sector_size; - do { - cmd[0] = READ_CAPACITY; - memset(&cmd[1], 0, 9); - memset(buffer, 0, 8); + memset(buffer, 0, 8); - the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, - 8, SD_TIMEOUT, sdkp->max_retries, - &exec_args); + the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, + 8, SD_TIMEOUT, sdkp->max_retries, + &exec_args); + + if (the_result > 0) { + sense_valid = scsi_sense_valid(&sshdr); if (media_not_present(sdkp, &sshdr)) return -ENODEV; - - if (the_result > 0) { - sense_valid = scsi_sense_valid(&sshdr); - if (sense_valid && - sshdr.sense_key == UNIT_ATTENTION && - sshdr.asc == 0x29 && sshdr.ascq == 0x00) - /* Device reset might occur several times, - * give it one more chance */ - if (--reset_retries > 0) - continue; - } - retries--; - - } while (the_result && retries); + } if (the_result) { sd_print_result(sdkp, "Read Capacity(10) failed", the_result); -- cgit