diff options
author | Philip Yang <Philip.Yang@amd.com> | 2022-01-13 21:24:20 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2022-06-30 15:31:14 -0400 |
commit | c7f21978fa6aafaf7ad37155c7d3a217dc3d16b0 (patch) | |
tree | f4445b8613bde7f5aeb7e7097ab8269487333957 /drivers/gpu/drm/amd/amdkfd/kfd_process.c | |
parent | acac270d09828edda2d530d255ee75ceb87583ec (diff) |
drm/amdkfd: Add user queue eviction restore SMI event
Output user queue eviction and restore event. User queue eviction may be
triggered by svm or userptr MMU notifier, TTM eviction, device suspend
and CRIU checkpoint and restore.
User queue restore may be rescheduled if eviction happens again while
restore.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a13e60d48b73..fc38a4d81420 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -43,6 +43,7 @@ struct mm_struct; #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" #include "kfd_svm.h" +#include "kfd_smi_events.h" /* * List of struct kfd_process (field kfd_process). @@ -1736,7 +1737,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -int kfd_process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) { int r = 0; int i; @@ -1745,6 +1746,9 @@ int kfd_process_evict_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, + trigger); + r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, &pdd->qpd); /* evict return -EIO if HWS is hang or asic is resetting, in this case @@ -1769,6 +1773,9 @@ fail: if (n_evicted == 0) break; + + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd)) pr_err("Failed to restore queues\n"); @@ -1788,6 +1795,8 @@ int kfd_process_restore_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); if (r) { @@ -1849,7 +1858,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid 0x%x\n", p->pasid); - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -1916,7 +1925,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (kfd_process_evict_queues(p)) + if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); |