diff options
author | Jani Nikula <jani.nikula@intel.com> | 2023-01-25 11:41:16 +0200 |
---|---|---|
committer | Jani Nikula <jani.nikula@intel.com> | 2023-01-25 11:41:16 +0200 |
commit | 902ecddc95c68efe71be733c57e8976948537926 (patch) | |
tree | cab01d5d8595a2fd554d1a18658e13e205294ce5 /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | |
parent | 81f66500f7c9e80c01bde8eb2cb78054051058e2 (diff) | |
parent | 68de345e101ce9a24e5c8849e69dd0dba2e8c9b2 (diff) |
Merge drm/drm-next into drm-intel-next
Backmerge to get the EDID handling changes.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 166 |
1 files changed, 129 insertions, 37 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 4b9e7b050ccd..4340d08f7607 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -29,13 +29,16 @@ #include "df/df_3_6_offset.h" #include "xgmi/xgmi_4_0_0_smn.h" #include "xgmi/xgmi_4_0_0_sh_mask.h" +#include "xgmi/xgmi_6_1_0_sh_mask.h" #include "wafl/wafl2_4_0_0_smn.h" #include "wafl/wafl2_4_0_0_sh_mask.h" #include "amdgpu_reset.h" #define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c +#define smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK 0x11a00218 #define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 +#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218 static DEFINE_MUTEX(xgmi_mutex); @@ -79,11 +82,27 @@ static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = { smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000 }; +static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[] = { + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x200000, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x300000, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x400000, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x500000, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x600000, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x700000 +}; + static const int walf_pcs_err_status_reg_aldebaran[] = { smnPCS_GOPX1_PCS_ERROR_STATUS, smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000 }; +static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = { + smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK, + smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -162,6 +181,67 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = { SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)}, }; +static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = { + {"XGMI3X16 PCS DataLossErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataLossErr)}, + {"XGMI3X16 PCS TrainingErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TrainingErr)}, + {"XGMI3X16 PCS FlowCtrlAckErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlAckErr)}, + {"XGMI3X16 PCS RxFifoUnderflowErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoUnderflowErr)}, + {"XGMI3X16 PCS RxFifoOverflowErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoOverflowErr)}, + {"XGMI3X16 PCS CRCErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, CRCErr)}, + {"XGMI3X16 PCS BERExceededErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, BERExceededErr)}, + {"XGMI3X16 PCS TxVcidDataErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxVcidDataErr)}, + {"XGMI3X16 PCS ReplayBufParityErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayBufParityErr)}, + {"XGMI3X16 PCS DataParityErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataParityErr)}, + {"XGMI3X16 PCS ReplayFifoOverflowErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)}, + {"XGMI3X16 PCS ReplayFifoUnderflowErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)}, + {"XGMI3X16 PCS ElasticFifoOverflowErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)}, + {"XGMI3X16 PCS DeskewErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DeskewErr)}, + {"XGMI3X16 PCS FlowCtrlCRCErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlCRCErr)}, + {"XGMI3X16 PCS DataStartupLimitErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataStartupLimitErr)}, + {"XGMI3X16 PCS FCInitTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FCInitTimeoutErr)}, + {"XGMI3X16 PCS RecoveryTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryTimeoutErr)}, + {"XGMI3X16 PCS ReadySerialTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)}, + {"XGMI3X16 PCS ReadySerialAttemptErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialAttemptErr)}, + {"XGMI3X16 PCS RecoveryAttemptErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryAttemptErr)}, + {"XGMI3X16 PCS RecoveryRelockAttemptErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)}, + {"XGMI3X16 PCS ReplayAttemptErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayAttemptErr)}, + {"XGMI3X16 PCS SyncHdrErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, SyncHdrErr)}, + {"XGMI3X16 PCS TxReplayTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxReplayTimeoutErr)}, + {"XGMI3X16 PCS RxReplayTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxReplayTimeoutErr)}, + {"XGMI3X16 PCS LinkSubTxTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubTxTimeoutErr)}, + {"XGMI3X16 PCS LinkSubRxTimeoutErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubRxTimeoutErr)}, + {"XGMI3X16 PCS RxCMDPktErr", + SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)}, +}; + /** * DOC: AMDGPU XGMI Support * @@ -809,39 +889,47 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, uint32_t value, + uint32_t mask_value, uint32_t *ue_count, uint32_t *ce_count, - bool is_xgmi_pcs) + bool is_xgmi_pcs, + bool check_mask) { int i; - int ue_cnt; + int ue_cnt = 0; + const struct amdgpu_pcs_ras_field *pcs_ras_fields = NULL; + uint32_t field_array_size = 0; if (is_xgmi_pcs) { - /* query xgmi pcs error status, - * only ue is supported */ - for (i = 0; i < ARRAY_SIZE(xgmi_pcs_ras_fields); i ++) { - ue_cnt = (value & - xgmi_pcs_ras_fields[i].pcs_err_mask) >> - xgmi_pcs_ras_fields[i].pcs_err_shift; - if (ue_cnt) { - dev_info(adev->dev, "%s detected\n", - xgmi_pcs_ras_fields[i].err_name); - *ue_count += ue_cnt; - } + if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) { + pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; + field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); + } else { + pcs_ras_fields = &xgmi_pcs_ras_fields[0]; + field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields); } } else { - /* query wafl pcs error status, - * only ue is supported */ - for (i = 0; i < ARRAY_SIZE(wafl_pcs_ras_fields); i++) { - ue_cnt = (value & - wafl_pcs_ras_fields[i].pcs_err_mask) >> - wafl_pcs_ras_fields[i].pcs_err_shift; - if (ue_cnt) { - dev_info(adev->dev, "%s detected\n", - wafl_pcs_ras_fields[i].err_name); - *ue_count += ue_cnt; - } + pcs_ras_fields = &wafl_pcs_ras_fields[0]; + field_array_size = ARRAY_SIZE(wafl_pcs_ras_fields); + } + + if (check_mask) + value = value & ~mask_value; + + /* query xgmi/walf pcs error status, + * only ue is supported */ + for (i = 0; value && i < field_array_size; i++) { + ue_cnt = (value & + pcs_ras_fields[i].pcs_err_mask) >> + pcs_ras_fields[i].pcs_err_shift; + if (ue_cnt) { + dev_info(adev->dev, "%s detected\n", + pcs_ras_fields[i].err_name); + *ue_count += ue_cnt; } + + /* reset bit value if the bit is checked */ + value &= ~(pcs_ras_fields[i].pcs_err_mask); } return 0; @@ -852,7 +940,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; int i; - uint32_t data; + uint32_t data, mask_data = 0; uint32_t ue_cnt = 0, ce_cnt = 0; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) @@ -867,15 +955,15 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++) { data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i]); if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, true); + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, false); } /* check wafl pcs error */ for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct); i++) { data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i]); if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, false); + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, false, false); } break; case CHIP_VEGA20: @@ -883,31 +971,35 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) { data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]); if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, true); + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, false); } /* check wafl pcs error */ for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20); i++) { data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i]); if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, false); + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, false, false); } break; case CHIP_ALDEBARAN: /* check xgmi3x16 pcs error */ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) { data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]); + mask_data = + RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[i]); if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, true); + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, true); } /* check wafl pcs error */ for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) { data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]); + mask_data = + RREG32_PCIE(walf_pcs_err_noncorrectable_mask_reg_aldebaran[i]); if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, false); + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, false, true); } break; default: |