summaryrefslogtreecommitdiff
path: root/include/linux/mlx5/device.h
diff options
context:
space:
mode:
authorAya Levin <ayal@nvidia.com>2021-10-11 13:14:28 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2021-10-25 13:51:19 -0700
commitcb464ba53c0cb497dcb4a3daaf4fad4b75291863 (patch)
tree8a768ef1c4e8d3984505f7bb8a5dbfa28843a488 /include/linux/mlx5/device.h
parent2fdeb4f4c2aea53a6a2e8adb69e811cf304a0ae5 (diff)
net/mlx5: Extend health buffer dump
Enhance health buffer to include: - assert_var5: expose the 6'th assert variable. - time: error's time-stamp in seconds (epoch time). - rfr: Recovery Flow Requiered. When set, indicates that the error cannot be recovered without flow involving reset. - severity: error's severity value, ranging from emergency to debug. Expose them in the health buffer dump (dmesg and devlink fw reporter). Health buffer in dmesg: mlx5_core 0000:08:00.0: print_health_info:425:(pid 912): Health issue observed, firmware internal error, severity(3) ERROR: mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[0] 0x08040700 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[1] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[2] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[3] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[4] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[5] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:432:(pid 912): assert_exit_ptr 0x00aaf800 mlx5_core 0000:08:00.0: print_health_info:434:(pid 912): assert_callra 0x00aaf70c mlx5_core 0000:08:00.0: print_health_info:436:(pid 912): fw_ver 16.32.492 mlx5_core 0000:08:00.0: print_health_info:437:(pid 912): time 1634819758 mlx5_core 0000:08:00.0: print_health_info:438:(pid 912): hw_id 0x0000020d mlx5_core 0000:08:00.0: print_health_info:439:(pid 912): rfr 0 mlx5_core 0000:08:00.0: print_health_info:440:(pid 912): severity 3 (ERROR) mlx5_core 0000:08:00.0: print_health_info:441:(pid 912): irisc_index 9 mlx5_core 0000:08:00.0: print_health_info:442:(pid 912): synd 0x1: firmware internal error mlx5_core 0000:08:00.0: print_health_info:444:(pid 912): ext_synd 0x802b mlx5_core 0000:08:00.0: print_health_info:445:(pid 912): raw fw_ver 0x102001ec Signed-off-by: Aya Levin <ayal@nvidia.com> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'include/linux/mlx5/device.h')
-rw-r--r--include/linux/mlx5/device.h14
1 files changed, 8 insertions, 6 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 347167c18802..f8a0bbb42c3b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -541,19 +541,21 @@ struct mlx5_cmd_layout {
u8 status_own;
};
-enum mlx5_fatal_assert_bit_offsets {
- MLX5_RFR_OFFSET = 31,
+enum mlx5_rfr_severity_bit_offsets {
+ MLX5_RFR_BIT_OFFSET = 0x7,
};
struct health_buffer {
- __be32 assert_var[5];
- __be32 rsvd0[3];
+ __be32 assert_var[6];
+ __be32 rsvd0[2];
__be32 assert_exit_ptr;
__be32 assert_callra;
- __be32 rsvd1[2];
+ __be32 rsvd1[1];
+ __be32 time;
__be32 fw_ver;
__be32 hw_id;
- __be32 rfr;
+ u8 rfr_severity;
+ u8 rsvd2[3];
u8 irisc_index;
u8 synd;
__be16 ext_synd;