From 3f6194af539464d83b29ed347aceddb336a3625c Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Tue, 16 Oct 2018 23:37:39 -0700 Subject: scsi: megaraid_sas: Add watchdog thread to detect Firmware fault Currently driver checks for Firmware state change from ISR context, and only when there are interrupts tied with no I/O completions. We have seen multiple cases where doorbell interrupts sent by firmware to indicate FW state change are not processed by driver and it takes long time for driver to trigger OCR. And if there are no IOs running, since we only check the FW state as part of ISR code, fault goes undetected by driver and OCR will not be triggered. This patch introduces a separate workqueue that runs every one second to detect Firmware FAULT state and trigger reset immediately. As an additional gain, removing PCI reads from ISR to check FW state results in improved performance as well. Signed-off-by: Sumit Saxena Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 34 +++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'drivers/scsi/megaraid/megaraid_sas_base.c') diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 9b90c716f06d..4dc29e055461 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5582,8 +5582,20 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->skip_heartbeat_timer_del = 1; } + /* + * Create and start watchdog thread which will monitor + * controller state every 1 sec and trigger OCR when + * it enters fault state + */ + if (instance->adapter_type != MFI_SERIES) + if (megasas_fusion_start_watchdog(instance) != SUCCESS) + goto fail_start_watchdog; + return 0; +fail_start_watchdog: + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); fail_get_ld_pd_list: instance->instancet->disable_intr(instance); fail_init_adapter: @@ -6434,12 +6446,10 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) instance->disableOnlineCtrlReset = 1; instance->UnevenSpanSupport = 0; - if (instance->adapter_type != MFI_SERIES) { + if (instance->adapter_type != MFI_SERIES) INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq); - INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq); - } else { + else INIT_WORK(&instance->work_init, process_fw_state_change_wq); - } } /** @@ -6708,6 +6718,10 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); + /* Stop the FW fault detection watchdog */ + if (instance->adapter_type != MFI_SERIES) + megasas_fusion_stop_watchdog(instance); + megasas_flush_cache(instance); megasas_shutdown_controller(instance, MR_DCMD_HIBERNATE_SHUTDOWN); @@ -6843,8 +6857,16 @@ megasas_resume(struct pci_dev *pdev) if (megasas_start_aen(instance)) dev_err(&instance->pdev->dev, "Start AEN failed\n"); + /* Re-launch FW fault watchdog */ + if (instance->adapter_type != MFI_SERIES) + if (megasas_fusion_start_watchdog(instance) != SUCCESS) + goto fail_start_watchdog; + return 0; +fail_start_watchdog: + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); fail_init_mfi: megasas_free_ctrl_dma_buffers(instance); megasas_free_ctrl_mem(instance); @@ -6912,6 +6934,10 @@ static void megasas_detach_one(struct pci_dev *pdev) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); + /* Stop the FW fault detection watchdog */ + if (instance->adapter_type != MFI_SERIES) + megasas_fusion_stop_watchdog(instance); + if (instance->fw_crash_state != UNAVAILABLE) megasas_free_host_crash_buffer(instance); scsi_remove_host(instance->host); -- cgit