diff options
author | Ofir Bitton <obitton@habana.ai> | 2023-07-25 21:11:56 +0300 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2023-10-09 12:37:21 +0300 |
commit | d261b0ab131e2511b70f7bc4a3737d3b90ca6e87 (patch) | |
tree | e860450dfb5b564ccab5f7bd32a0b5feba708390 /drivers/accel/habanalabs/gaudi2 | |
parent | 10d260f655c1af1b5a5e7d0cea001e3d0461aeaa (diff) |
accel/habanalabs/gaudi2: include block id in ECC error reporting
During ECC event handling, Memory wrapper id was mistakenly
printed as block id. Fix the print and in addition fetch the actual
block-id from firmware.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/accel/habanalabs/gaudi2')
-rw-r--r-- | drivers/accel/habanalabs/gaudi2/gaudi2.c | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index d60389b6700f..dca19be42d5f 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7834,16 +7834,29 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, struct hl_eq_ecc_data *ecc_data) { - u64 ecc_address = 0, ecc_syndrom = 0; + u64 ecc_address = 0, ecc_syndrome = 0; u8 memory_wrapper_idx = 0; + bool has_block_id = false; + u16 block_id; + + if (!hl_is_fw_sw_ver_below(hdev, 1, 12)) + has_block_id = true; ecc_address = le64_to_cpu(ecc_data->ecc_address); - ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); + ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom); memory_wrapper_idx = ecc_data->memory_wrapper_idx; - gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, - "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.", - ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); + if (has_block_id) { + block_id = le16_to_cpu(ecc_data->block_id); + gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, + "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.", + ecc_address, ecc_syndrome, memory_wrapper_idx, block_id, + ecc_data->is_critical); + } else { + gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, + "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.", + ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical); + } return !!ecc_data->is_critical; } |