1 files changed, 50 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index d889e3545120..ba99e0f258ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -25,6 +25,7 @@
 #include "kfd_topology.h"
 #include <linux/file.h>
 #include <uapi/linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_sysfs.h>
 
 #define MAX_WATCH_ADDRESSES	4
 
@@ -103,7 +104,8 @@ void debug_event_write_work_handler(struct work_struct *work)
 			struct kfd_process,
 			debug_event_workarea);
 
-	kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+	if (process->debug_trap_enabled && process->dbg_ev_file)
+		kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
 }
 
 /* update process/device/queue exception status, write to descriptor
@@ -202,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
 				   size_t exception_data_size)
 {
 	struct kfd_process *p;
+	struct kfd_process_device *pdd = NULL;
 	bool signaled_to_debugger_or_runtime = false;
 
-	p = kfd_lookup_process_by_pasid(pasid);
+	p = kfd_lookup_process_by_pasid(pasid, &pdd);
 
-	if (!p)
+	if (!pdd)
 		return false;
 
 	if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -236,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
 
 			mutex_unlock(&p->mutex);
 		} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
-			kfd_dqm_evict_pasid(dev->dqm, p->pasid);
-			kfd_signal_vm_fault_event(dev, p->pasid, NULL,
-							exception_data);
+			kfd_evict_process_device(pdd);
+			kfd_signal_vm_fault_event(pdd, NULL, exception_data);
 
 			signaled_to_debugger_or_runtime = true;
 		}
@@ -274,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
 		data = (struct kfd_hsa_memory_exception_data *)
 						pdd->vm_fault_exc_data;
 
-		kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
-		kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+		kfd_evict_process_device(pdd);
+		kfd_signal_vm_fault_event(pdd, NULL, data);
 		error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
 	}
 
@@ -348,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
 {
 	uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
 	uint32_t flags = pdd->process->dbg_flags;
+	struct amdgpu_device *adev = pdd->dev->adev;
+	int r;
 
 	if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
 		return 0;
 
+	if (!pdd->proc_ctx_cpu_ptr) {
+		r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+			AMDGPU_MES_PROC_CTX_SIZE,
+			&pdd->proc_ctx_bo,
+			&pdd->proc_ctx_gpu_addr,
+			&pdd->proc_ctx_cpu_ptr,
+			false);
+		if (r) {
+			dev_err(adev->dev,
+			"failed to allocate process context bo\n");
+			return r;
+		}
+		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+	}
+
 	return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
 						pdd->watch_points, flags, sq_trap_en);
 }
@@ -363,47 +382,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i
 
 	*watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
 
-	spin_lock(&pdd->dev->kfd->watch_points_lock);
+	spin_lock(&pdd->dev->watch_points_lock);
 
 	for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
 		/* device watchpoint in use so skip */
-		if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
+		if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
 			continue;
 
 		pdd->alloc_watch_ids |= 0x1 << i;
-		pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
+		pdd->dev->alloc_watch_ids |= 0x1 << i;
 		*watch_id = i;
-		spin_unlock(&pdd->dev->kfd->watch_points_lock);
+		spin_unlock(&pdd->dev->watch_points_lock);
 		return 0;
 	}
 
-	spin_unlock(&pdd->dev->kfd->watch_points_lock);
+	spin_unlock(&pdd->dev->watch_points_lock);
 
 	return -ENOMEM;
 }
 
 static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
 {
-	spin_lock(&pdd->dev->kfd->watch_points_lock);
+	spin_lock(&pdd->dev->watch_points_lock);
 
 	/* process owns device watch point so safe to clear */
 	if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
 		pdd->alloc_watch_ids &= ~(0x1 << watch_id);
-		pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
+		pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
 	}
 
-	spin_unlock(&pdd->dev->kfd->watch_points_lock);
+	spin_unlock(&pdd->dev->watch_points_lock);
 }
 
 static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
 {
 	bool owns_watch_id = false;
 
-	spin_lock(&pdd->dev->kfd->watch_points_lock);
+	spin_lock(&pdd->dev->watch_points_lock);
 	owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
 			((pdd->alloc_watch_ids >> watch_id) & 0x1);
 
-	spin_unlock(&pdd->dev->kfd->watch_points_lock);
+	spin_unlock(&pdd->dev->watch_points_lock);
 
 	return owns_watch_id;
 }
@@ -497,14 +516,24 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
 	int i, r = 0, rewind_count = 0;
 
 	for (i = 0; i < target->n_pdds; i++) {
-		if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) &&
+		struct kfd_topology_device *topo_dev =
+				kfd_topology_device_by_id(target->pdds[i]->dev->id);
+		uint32_t caps = topo_dev->node_props.capability;
+
+		if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
 			(*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
 			*flags = prev_flags;
 			return -EACCES;
 		}
+
+		if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
+		    (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) {
+			*flags = prev_flags;
+			return -EACCES;
+		}
 	}
 
-	target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
+	target->dbg_flags = *flags;
 	*flags = prev_flags;
 	for (i = 0; i < target->n_pdds; i++) {
 		struct kfd_process_device *pdd = target->pdds[i];
@@ -645,6 +674,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
 	else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
 		target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
 
+	cancel_work_sync(&target->debug_event_workarea);
 	fput(target->dbg_ev_file);
 	target->dbg_ev_file = NULL;