diff options
author | Haoyue Xu <xuhaoyue1@hisilicon.com> | 2022-07-14 21:43:53 +0800 |
---|---|---|
committer | Leon Romanovsky <leonro@nvidia.com> | 2022-07-18 14:16:40 +0300 |
commit | 2de949abd6a539fac4b2c89a560e4ae505b6fb52 (patch) | |
tree | 750729a7df65e78b5b811272790fb8616aca9643 /drivers/infiniband/hw/hns/hns_roce_hw_v2.h | |
parent | 75e4e716f7089558fda4ddc660fa8dbdec4eb1d3 (diff) |
RDMA/hns: Recover 1bit-ECC error of RAM on chip
Since ECC memory maintains a memory system immune to single-bit errors,
add support for correcting the 1bit-ECC error, which prevents a 1bit-ECC
error become an uncorrected type error. When a 1bit-ECC error happens in
the internal ram of the ROCE engine, such as the QPC table, as a 1bit-ECC
error caused by reading, the ROCE engine only corrects those 1bit ECC
errors by writing.
Link: https://lore.kernel.org/r/20220714134353.16700-6-liangwenpeng@huawei.com
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_hw_v2.h')
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e6186149ef19..f96debac30fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -250,6 +250,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_ROCE_OPC_EXT_CFG = 0x8512, + HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -1107,6 +1108,11 @@ enum { #define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32) #define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64) +/* Fields of HNS_ROCE_QUERY_RAM_ECC */ +#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0) +#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32) +#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64) + struct hns_roce_cfg_sgid_tb { __le32 table_idx_rsv; __le32 vf_sgid_l; @@ -1343,6 +1349,12 @@ struct hns_roce_dip { struct list_head node; /* all dips are on a list */ }; +struct fmea_ram_ecc { + u32 is_ecc_err; + u32 res_type; + u32 index; +}; + /* only for RNR timeout issue of HIP08 */ #define HNS_ROCE_CLOCK_ADJUST 1000 #define HNS_ROCE_MAX_CQ_PERIOD 65 |