From 9e612c11a758087a1acbc1cc1d84f2deaf496b34 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 13 Nov 2019 22:26:22 +0800 Subject: drm/amdgpu: init umc functions for arcturus umc ras reuse vg20 umc functions for arcturus umc ras Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9f2a893871ec..ee615d050837 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -639,6 +639,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.funcs = &umc_v6_0_funcs; break; case CHIP_VEGA20: + case CHIP_ARCTURUS: adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; @@ -752,6 +753,7 @@ static int gmc_v9_0_late_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA20: + case CHIP_ARCTURUS: r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); -- cgit From f6c3623b7b2f378f6210a6a5c898d1b36c722e6f Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Tue, 19 Nov 2019 14:02:57 +0800 Subject: drm/amdgpu: implement querying ras error count for mmhub9.4 Get mmhub error counter by accessing EDC_CNT registers. v2: Add mmhub_v9_4_ prefix for local static variable and function Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ee615d050837..5f4a6cdf83a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -658,6 +658,9 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) case CHIP_VEGA20: adev->mmhub.funcs = &mmhub_v1_0_funcs; break; + case CHIP_ARCTURUS: + adev->mmhub.funcs = &mmhub_v9_4_funcs; + break; default: break; } -- cgit From 4ed8a03740d0ce092563c8fcb76d2c28da4675cd Mon Sep 17 00:00:00 2001 From: changzhu Date: Tue, 19 Nov 2019 11:13:29 +0800 Subject: drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10 It may lose gpuvm invalidate acknowldege state across power-gating off cycle. To avoid this issue in gmc9/gmc10 invalidation, add semaphore acquire before invalidation and semaphore release after invalidation. After adding semaphore acquire before invalidation, the semaphore register become read-only if another process try to acquire semaphore. Then it will not be able to release this semaphore. Then it may cause deadlock problem. If this deadlock problem happens, it needs a semaphore firmware fix. Signed-off-by: changzhu Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 5f4a6cdf83a7..365a88ecdec4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -459,6 +459,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -474,7 +497,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, break; udelay(1); } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) return; @@ -489,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -499,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } -- cgit From 418899d6157516fb0b7acad5e37653e60b285852 Mon Sep 17 00:00:00 2001 From: changzhu Date: Tue, 10 Dec 2019 10:23:09 +0800 Subject: drm/amdgpu: avoid using invalidate semaphore for picasso It may cause timeout waiting for sem acquire in VM flush when using invalidate semaphore for picasso. So it needs to avoid using invalidate semaphore for piasso. Signed-off-by: changzhu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 365a88ecdec4..a208b2883c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -468,8 +468,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) { + if ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acuqire */ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); @@ -499,8 +502,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) + if ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))) /* * add semaphore release after invalidation, * write with 0 means semaphore release @@ -531,8 +537,11 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if ((ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_sem + eng, 0x1, 0x1); @@ -548,8 +557,11 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if ((ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))) /* * add semaphore release after invalidation, * write with 0 means semaphore release -- cgit From 4cf781c24c3bc8cc50f8013143aa20b26e9217e8 Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 11 Dec 2019 10:18:55 +0800 Subject: drm/amdgpu: Added RAS UMC error query support for Arcturus Updated UMC 6.1 function set to support UMC 6.1.1 and 6.1.2 devices Reviewed-by: Alex Deucher Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index a208b2883c03..53dfc82ca171 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -708,11 +708,18 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.funcs = &umc_v6_0_funcs; break; case CHIP_VEGA20: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; case CHIP_ARCTURUS: adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; - adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.funcs = &umc_v6_1_funcs; break; -- cgit