summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Cornwall <jay.cornwall@amd.com>2025-01-16 14:36:39 -0600
committerAlex Deucher <alexander.deucher@amd.com>2025-01-28 16:22:02 -0500
commitf214b7beb00621b983e67ce97477afc3ab4b38f4 (patch)
treeccf76e65bf797071757f56d754ff8eed3790cadd
parent64179a1416e1420a34226ab3beb5f84710953d16 (diff)
drm/amdkfd: Block per-queue reset when halt_if_hws_hang=1
The purpose of halt_if_hws_hang is to preserve GPU state for driver debugging when queue preemption fails. Issuing per-queue reset may kill wavefronts which caused the preemption failure. Signed-off-by: Jay Cornwall <jay.cornwall@amd.com> Reviewed-by: Jonathan Kim <Jonathan.Kim@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org # 6.12.x
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1405e8affd48..d4593374e7a1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2325,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
*/
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) {
+ while (halt_if_hws_hang)
+ schedule();
if (reset_queues_on_hws_hang(dqm)) {
- while (halt_if_hws_hang)
- schedule();
dqm->is_hws_hang = true;
kfd_hws_hang(dqm);
retval = -ETIME;