summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/common/device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/habanalabs/common/device.c')
-rw-r--r--drivers/accel/habanalabs/common/device.c25
1 files changed, 15 insertions, 10 deletions
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 9711e8fc979d..a365791a9f5c 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -853,6 +853,9 @@ static int device_early_init(struct hl_device *hdev)
gaudi2_set_asic_funcs(hdev);
strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name));
break;
+ case ASIC_GAUDI2C:
+ gaudi2_set_asic_funcs(hdev);
+ strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name));
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
@@ -1041,18 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
}
-static void hl_device_eq_heartbeat(struct hl_device *hdev)
+static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
{
- u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (!prop->cpucp_info.eq_health_check_supported)
- return;
+ return 0;
- if (hdev->eq_heartbeat_received)
+ if (hdev->eq_heartbeat_received) {
hdev->eq_heartbeat_received = false;
- else
- hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+ } else {
+ dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
+ return -EIO;
+ }
+
+ return 0;
}
static void hl_device_heartbeat(struct work_struct *work)
@@ -1069,10 +1075,9 @@ static void hl_device_heartbeat(struct work_struct *work)
/*
* For EQ health check need to check if driver received the heartbeat eq event
* in order to validate the eq is working.
+ * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
*/
- hl_device_eq_heartbeat(hdev);
-
- if (!hdev->asic_funcs->send_heartbeat(hdev))
+ if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
goto reschedule;
if (hl_device_operational(hdev, NULL))
@@ -2035,7 +2040,7 @@ device_reset:
if (ctx)
hl_ctx_put(ctx);
- return hl_device_reset(hdev, flags);
+ return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD);
}
static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)