diff options
21 files changed, 152 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 93413be59e08..177f04491a11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -161,10 +161,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) r = amdgpu_ring_reset(ring, job->vmid, NULL); if (!r) { - if (is_guilty) { + if (is_guilty) atomic_inc(&ring->adev->gpu_reset_counter); - amdgpu_fence_driver_force_completion(ring); - } drm_sched_wqueue_start(&ring->sched); dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 4b2af95203b2..4d0ee3ffe985 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9577,7 +9577,11 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, @@ -9650,7 +9654,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8d5c0ab016d2..39f4dd18c277 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6842,7 +6842,11 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -7004,7 +7008,11 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 795bd353a9ce..964fa3f2e271 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5337,7 +5337,11 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -5452,7 +5456,11 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d91e0423c482..95e319974f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7242,7 +7242,12 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, DRM_ERROR("fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 272f0f8e41d4..8bfee17a826e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3620,7 +3620,12 @@ pipe_reset: dev_err(adev->dev, "fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } enum amdgpu_gfx_cp_ras_mem_id { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 93eb71d2ce30..6621a7b1f29f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -768,9 +768,15 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + jpeg_v2_0_stop(ring->adev); jpeg_v2_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 45b8702d20ad..44a5c0e82ca4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -647,9 +647,15 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 9bd0ae4a24a1..e813af4eedd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -559,9 +559,15 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + jpeg_v3_0_stop(ring->adev); jpeg_v3_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 4fc1ef85b294..190f0742d701 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -724,12 +724,18 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + if (amdgpu_sriov_vf(ring->adev)) return -EINVAL; jpeg_v4_0_stop(ring->adev); jpeg_v4_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index bdc7612ba56a..04755b7a62d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1147,12 +1147,18 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; jpeg_v4_0_3_core_stall_reset(ring); jpeg_v4_0_3_start_jrbc(ring); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index aee4f50a0f52..e7f942dc714a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -838,12 +838,18 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index b8828432c98a..c13ae87d4e64 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1679,6 +1679,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + bool is_guilty = ring->funcs->is_guilty(ring); struct amdgpu_device *adev = ring->adev; u32 id = ring->me; int r; @@ -1689,8 +1690,13 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, amdgpu_amdkfd_suspend(adev, true); r = amdgpu_sdma_reset_engine(adev, id); amdgpu_amdkfd_resume(adev, true); + if (r) + return r; - return r; + if (is_guilty) + amdgpu_fence_driver_force_completion(ring); + + return 0; } static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) @@ -1734,8 +1740,8 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 inst_mask; - int i; + u32 inst_mask, tmp_mask; + int i, r; inst_mask = 1 << ring->me; udelay(50); @@ -1752,7 +1758,24 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) return -ETIMEDOUT; } - return sdma_v4_4_2_inst_start(adev, inst_mask, true); + r = sdma_v4_4_2_inst_start(adev, inst_mask, true); + if (r) + return r; + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + ring = &adev->sdma.instance[i].ring; + + amdgpu_fence_driver_force_completion(ring); + + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page = &adev->sdma.instance[i].page; + + amdgpu_fence_driver_force_completion(page); + } + } + + return r; } static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 2fd72c85cf01..4d72b085b3dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1618,7 +1618,10 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return r; + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 21421f1bd209..42a25150f83a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1534,7 +1534,10 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return r; + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 2455965387e0..c6cb7ff15caa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1574,7 +1574,11 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, if (r) return r; - return sdma_v6_0_gfx_resume_instance(adev, i, true); + r = sdma_v6_0_gfx_resume_instance(adev, i, true); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 6210be7e4a52..b00c63812899 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -826,7 +826,11 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, if (r) return r; - return sdma_v7_0_gfx_resume_instance(adev, i, true); + r = sdma_v7_0_gfx_resume_instance(adev, i, true); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 244359fa4aac..6c25e9fc4f0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1973,6 +1973,7 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; @@ -1980,7 +1981,11 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, vcn_v4_0_stop(vinst); vcn_v4_0_start(vinst); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 75c07ebf7fe4..1e1dd61b774e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1623,8 +1623,10 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, vcn_v4_0_3_hw_init_inst(vinst); vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); r = amdgpu_ring_test_helper(ring); - - return r; + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 731f7762c3e0..9c02446bb1a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1471,6 +1471,7 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; @@ -1478,7 +1479,11 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, vcn_v4_0_5_stop(vinst); vcn_v4_0_5_start(vinst); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index f975994b3ff4..c8924f97cf58 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1198,6 +1198,7 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; @@ -1205,7 +1206,11 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, vcn_v5_0_0_stop(vinst); vcn_v5_0_0_start(vinst); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { |