diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_7.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 177 |
1 files changed, 74 insertions, 103 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 5d5d031c9e7d..a3ee3c4c650f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -57,13 +57,6 @@ static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst; } -static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, - uint32_t umc_inst, - uint32_t ch_inst) -{ - return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; -} - static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, uint64_t mc_umc_status, uint32_t umc_reg_offset) { @@ -71,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, uint64_t reg_value; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + dev_info(adev->dev, "Deferred error\n"); if (mc_umc_status) dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); @@ -167,29 +160,33 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev } } +static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + + umc_v6_7_ecc_info_query_correctable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ce_count)); + + umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ue_count)); + + return 0; +} + static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_v6_7_ecc_info_query_correctable_error_count(adev, - umc_inst, ch_inst, - &(err_data->ce_count)); - umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, - umc_inst, ch_inst, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status); } -static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst) +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) { uint32_t channel_index; uint64_t soc_pa, retired_page, column; @@ -222,23 +219,23 @@ static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, } } -static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct ras_err_data *err_data = (struct ras_err_data *)data; eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) - return; + return 0; /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -250,25 +247,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, umc_v6_7_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst); } + + return 0; } static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_v6_7_ecc_info_query_error_address(adev, - err_data, - ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_ecc_info_query_error_address, ras_error_status); } static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, @@ -371,11 +358,14 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev } } -static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_addr; uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, @@ -409,58 +399,54 @@ static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, /* clear higher chip error count */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_7_CE_CNT_INIT); + + return 0; } static void umc_v6_7_reset_error_count(struct amdgpu_device *adev) { - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; + amdgpu_umc_loop_channels(adev, + umc_v6_7_reset_error_count_per_channel, NULL); +} - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); +static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); - umc_v6_7_reset_error_count_per_channel(adev, - umc_reg_offset); - } + umc_v6_7_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count), + ch_inst, umc_inst); + + umc_v6_7_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + + return 0; } static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - umc_v6_7_query_correctable_error_count(adev, - umc_reg_offset, - &(err_data->ce_count), - ch_inst, umc_inst); - umc_v6_7_querry_uncorrectable_error_count(adev, - umc_reg_offset, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_query_ecc_error_count, ras_error_status); umc_v6_7_reset_error_count(adev); } -static void umc_v6_7_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v6_7_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t mc_umc_status_addr; uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr; + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -470,12 +456,12 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) { /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); - return; + return 0; } /* calculate error address if ue error is detected */ @@ -491,29 +477,15 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; } static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - umc_v6_7_query_error_address(adev, - err_data, - umc_reg_offset, ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_query_error_address, ras_error_status); } static uint32_t umc_v6_7_query_ras_poison_mode_per_channel( @@ -553,5 +525,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = { .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, - .convert_ras_error_address = umc_v6_7_convert_error_address, }; |
