From 8544374c0f82edb285779f21b149826fe2c2977c Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Mon, 13 Jan 2025 17:35:59 -0600 Subject: drm/amdkfd: Have kfd driver use same PASID values from graphic driver Current kfd driver has its own PASID value for a kfd process and uses it to locate vm at interrupt handler or mapping between kfd process and vm. That design is not working when a physical gpu device has multiple spatial partitions, ex: adev in CPX mode. This patch has kfd driver use same pasid values that graphic driver generated which is per vm per pasid. These pasid values are passed to fw/hardware. We do not need change interrupt handler though more pasid values are used. Also, pasid values at log are replaced by user process pid; pasid values are not exposed to user. Users see their process pids that have meaning in user space. Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index a8abc3091801..12456c61ffa5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -204,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev, size_t exception_data_size) { struct kfd_process *p; + struct kfd_process_device *pdd = NULL; bool signaled_to_debugger_or_runtime = false; - p = kfd_lookup_process_by_pasid(pasid); + p = kfd_lookup_process_by_pasid(pasid, &pdd); - if (!p) + if (!pdd) return false; if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true, @@ -238,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev, mutex_unlock(&p->mutex); } else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) { - kfd_dqm_evict_pasid(dev->dqm, p->pasid); - kfd_signal_vm_fault_event(dev, p->pasid, NULL, - exception_data); + kfd_evict_process_device(pdd); + kfd_signal_vm_fault_event(pdd, NULL, exception_data); signaled_to_debugger_or_runtime = true; } @@ -276,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p, data = (struct kfd_hsa_memory_exception_data *) pdd->vm_fault_exc_data; - kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid); - kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data); + kfd_evict_process_device(pdd); + kfd_signal_vm_fault_event(pdd, NULL, data); error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION); } -- cgit