summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process.c
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2022-01-13 21:24:20 -0500
committerAlex Deucher <alexander.deucher@amd.com>2022-06-30 15:31:14 -0400
commitc7f21978fa6aafaf7ad37155c7d3a217dc3d16b0 (patch)
treef4445b8613bde7f5aeb7e7097ab8269487333957 /drivers/gpu/drm/amd/amdkfd/kfd_process.c
parentacac270d09828edda2d530d255ee75ceb87583ec (diff)
drm/amdkfd: Add user queue eviction restore SMI event
Output user queue eviction and restore event. User queue eviction may be triggered by svm or userptr MMU notifier, TTM eviction, device suspend and CRIU checkpoint and restore. User queue restore may be rescheduled if eviction happens again while restore. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a13e60d48b73..fc38a4d81420 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -43,6 +43,7 @@ struct mm_struct;
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_smi_events.h"
/*
* List of struct kfd_process (field kfd_process).
@@ -1736,7 +1737,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-int kfd_process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
{
int r = 0;
int i;
@@ -1745,6 +1746,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+ trigger);
+
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
/* evict return -EIO if HWS is hang or asic is resetting, in this case
@@ -1769,6 +1773,9 @@ fail:
if (n_evicted == 0)
break;
+
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
pr_err("Failed to restore queues\n");
@@ -1788,6 +1795,8 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
@@ -1849,7 +1858,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid 0x%x\n", p->pasid);
- ret = kfd_process_evict_queues(p);
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -1916,7 +1925,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- if (kfd_process_evict_queues(p))
+ if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);