diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 20 |
1 files changed, 7 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 5fd67e1cc2a0..9c07014d9bd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -238,19 +238,15 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; - int locked; /* block amdgpu_gpu_recover till msg FLR COMPLETE received, * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. - * - * we can unlock the lock_reset to allow "amdgpu_job_timedout" - * to run gpu_recover() after FLR_NOTIFICATION_CMPL received - * which means host side had finished this VF's FLR. */ - locked = mutex_trylock(&adev->lock_reset); - if (locked) - adev->in_gpu_reset = true; + if (!down_read_trylock(&adev->reset_sem)) + return; + + atomic_set(&adev->in_gpu_reset, 1); do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) @@ -261,14 +257,12 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - if (locked) { - adev->in_gpu_reset = false; - mutex_unlock(&adev->lock_reset); - } + atomic_set(&adev->in_gpu_reset, 0); + up_read(&adev->reset_sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT) + && (amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) amdgpu_device_gpu_recover(adev, NULL); } |