summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process.c
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2022-03-25 14:55:30 -0400
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 12:34:48 -0400
commit0ab2d7532b05a3e7c06fd3b0c8bd6b46c1dfb508 (patch)
tree01e6a3ae6c0a8b8e3b0169213bdc43431c5fb1f4 /drivers/gpu/drm/amd/amdkfd/kfd_process.c
parentd230f1bfe7a1977565ce1e2804ddb7b7a3d911ff (diff)
drm/amdkfd: prepare per-process debug enable and disable
The ROCm debugger will attach to a process to debug by PTRACE and will expect the KFD to prepare a process for the target PID, whether the target PID has opened the KFD device or not. This patch is to explicity handle this requirement. Further HW mode setting and runtime coordination requirements will be handled in following patches. In the case where the target process has not opened the KFD device, a new KFD process must be created for the target PID. The debugger as well as the target process for this case will have not acquired any VMs so handle process restoration to correctly account for this. To coordinate with HSA runtime, the debugger must be aware of the target process' runtime enablement status and will copy the runtime status information into the debugged KFD process for later query. On enablement, the debugger will subscribe to a set of exceptions where each exception events will notify the debugger through a pollable FIFO file descriptor that the debugger provides to the KFD to manage. Finally on process termination of either the debugger or the target, debugging must be disabled if it has not been done so. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c60
1 files changed, 40 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7f7d1378a2f8..ef67f5e37c6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -44,6 +44,7 @@ struct mm_struct;
#include "kfd_iommu.h"
#include "kfd_svm.h"
#include "kfd_smi_events.h"
+#include "kfd_debug.h"
/*
* List of struct kfd_process (field kfd_process).
@@ -69,7 +70,6 @@ static struct kfd_process *find_process(const struct task_struct *thread,
bool ref);
static void kfd_process_ref_release(struct kref *ref);
static struct kfd_process *create_process(const struct task_struct *thread);
-static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
static void evict_process_worker(struct work_struct *work);
static void restore_process_worker(struct work_struct *work);
@@ -798,18 +798,19 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
}
-struct kfd_process *kfd_create_process(struct file *filep)
+struct kfd_process *kfd_create_process(struct task_struct *thread)
{
struct kfd_process *process;
- struct task_struct *thread = current;
int ret;
- if (!thread->mm)
+ if (!(thread->mm && mmget_not_zero(thread->mm)))
return ERR_PTR(-EINVAL);
/* Only the pthreads threading model is supported. */
- if (thread->group_leader->mm != thread->mm)
+ if (thread->group_leader->mm != thread->mm) {
+ mmput(thread->mm);
return ERR_PTR(-EINVAL);
+ }
/*
* take kfd processes mutex before starting of process creation
@@ -833,10 +834,6 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (IS_ERR(process))
goto out;
- ret = kfd_process_init_cwsr_apu(process, filep);
- if (ret)
- goto out_destroy;
-
if (!procfs.kobj)
goto out;
@@ -870,16 +867,9 @@ out:
if (!IS_ERR(process))
kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
+ mmput(thread->mm);
return process;
-
-out_destroy:
- hash_del_rcu(&process->kfd_processes);
- mutex_unlock(&kfd_processes_mutex);
- synchronize_srcu(&kfd_processes_srcu);
- /* kfd_process_free_notifier will trigger the cleanup */
- mmu_notifier_put(&process->mmu_notifier);
- return ERR_PTR(ret);
}
struct kfd_process *kfd_get_process(const struct task_struct *thread)
@@ -1180,6 +1170,25 @@ static void kfd_process_notifier_release_internal(struct kfd_process *p)
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
+ kfd_dbg_trap_disable(p);
+
+ if (atomic_read(&p->debugged_process_count) > 0) {
+ struct kfd_process *target;
+ unsigned int temp;
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ hash_for_each_rcu(kfd_processes_table, temp, target, kfd_processes) {
+ if (target->debugger_process && target->debugger_process == p) {
+ mutex_lock_nested(&target->mutex, 1);
+ kfd_dbg_trap_disable(target);
+ mutex_unlock(&target->mutex);
+ if (atomic_read(&p->debugged_process_count) == 0)
+ break;
+ }
+ }
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+ }
mmu_notifier_put(&p->mmu_notifier);
}
@@ -1259,11 +1268,14 @@ void kfd_cleanup_processes(void)
mmu_notifier_synchronize();
}
-static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
+int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
{
unsigned long offset;
int i;
+ if (p->has_cwsr)
+ return 0;
+
for (i = 0; i < p->n_pdds; i++) {
struct kfd_node *dev = p->pdds[i]->dev;
struct qcm_process_device *qpd = &p->pdds[i]->qpd;
@@ -1292,6 +1304,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
}
+ p->has_cwsr = true;
+
return 0;
}
@@ -1434,6 +1448,10 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err)
goto err_event_init;
process->is_32bit_user_mode = in_compat_syscall();
+ process->debug_trap_enabled = false;
+ process->debugger_process = NULL;
+ process->exception_enable_mask = 0;
+ atomic_set(&process->debugged_process_count, 0);
process->pasid = kfd_pasid_alloc();
if (process->pasid == 0) {
@@ -1967,8 +1985,10 @@ static void restore_process_worker(struct work_struct *work)
*/
p->last_restore_timestamp = get_jiffies_64();
- ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
- &p->ef);
+ /* VMs may not have been acquired yet during debugging. */
+ if (p->kgd_process_info)
+ ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
+ &p->ef);
if (ret) {
pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS);