summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2019-08-13 15:46:03 +0800
committerAlex Deucher <alexander.deucher@amd.com>2019-08-15 10:51:50 -0500
commitd6e0cbb152b35833a26772c86b338d8297ce609d (patch)
treebadd7cbe79e7247e0ba795db8d2775506c35ffa6 /drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
parentf0f50dcfd427f9f4a179048f4b56bff176bacc94 (diff)
drm/amdgpu: implement querying ras error count for mmhub
get mmhub ea ras error count by accessing EDC_CNT register Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c55
1 files changed, 55 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index c3a98d964ce5..2bd7ada80088 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -21,11 +21,13 @@
*
*/
#include "amdgpu.h"
+#include "amdgpu_ras.h"
#include "mmhub_v1_0.h"
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
#include "mmhub/mmhub_1_0_default.h"
+#include "mmhub/mmhub_9_4_0_offset.h"
#include "vega10_enum.h"
#include "soc15_common.h"
@@ -33,6 +35,9 @@
#define mmDAGB0_CNTL_MISC2_RV 0x008f
#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
+#define EA_EDC_CNT_MASK 0x3
+#define EA_EDC_CNT_SHIFT 0x2
+
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
@@ -557,6 +562,56 @@ void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
+ int i;
+ uint32_t ea0_edc_cnt, ea0_edc_cnt2;
+ uint32_t ea1_edc_cnt, ea1_edc_cnt2;
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ /* EDC CNT will be cleared automatically after read */
+ ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20);
+ ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20);
+ ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20);
+ ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20);
+
+ /* error count of each error type is recorded by 2 bits,
+ * ce and ue count in EDC_CNT
+ */
+ for (i = 0; i < 5; i++) {
+ err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
+ err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
+ ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
+ err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
+ ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ }
+ /* successive ue count in EDC_CNT */
+ for (i = 0; i < 5; i++) {
+ err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
+ err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
+ ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ }
+
+ /* ce and ue count in EDC_CNT2 */
+ for (i = 0; i < 3; i++) {
+ err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
+ err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
+ ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
+ ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
+ err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
+ err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
+ ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
+ ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
+ }
+ /* successive ue count in EDC_CNT2 */
+ for (i = 0; i < 6; i++) {
+ err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
+ err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
+ ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
+ ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
+ }
}
const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {