summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c556
1 files changed, 451 insertions, 105 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 491acdf92f73..0239114fb6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -21,6 +21,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "athub/athub_2_0_0_offset.h"
@@ -31,6 +32,7 @@
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -39,37 +41,26 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-{
- return (struct amdgpu_device *)kgd;
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, mec, pipe, queue, vmid);
}
-static void unlock_srbm(struct kgd_dev *kgd)
+static void unlock_srbm(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, queue_id, 0);
+ lock_srbm(adev, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
@@ -81,33 +72,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct kgd_dev *kgd)
+static void release_queue(struct amdgpu_device *adev)
{
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
- lock_srbm(kgd, 0, 0, 0, vmid);
+ lock_srbm(adev, 0, 0, 0, vmid);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(kgd);
+ unlock_srbm(adev);
}
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
- unsigned int vmid)
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
@@ -150,22 +137,22 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
* but still works
*/
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(kgd, mec, pipe, 0, 0);
+ lock_srbm(adev, mec, pipe, 0, 0);
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(kgd);
+ unlock_srbm(adev);
return 0;
}
@@ -218,12 +205,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v10_sdma_mqd *)mqd;
}
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
@@ -231,7 +217,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
@@ -296,24 +282,23 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -321,7 +306,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -348,17 +333,16 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(kgd);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
return r;
}
-static int kgd_hqd_dump(struct kgd_dev *kgd,
+static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -368,17 +352,17 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(kgd);
+ release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -386,10 +370,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
@@ -456,18 +439,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -488,15 +470,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return 0;
}
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
@@ -506,13 +488,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(kgd);
+ release_queue(adev);
return retval;
}
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
@@ -529,12 +510,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
@@ -548,7 +528,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry;
#endif
- acquire_queue(kgd, pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
@@ -560,6 +540,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
default:
type = DRAIN_PIPE;
break;
@@ -630,20 +613,19 @@ loop:
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
+ release_queue(adev);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(kgd);
+ release_queue(adev);
return 0;
}
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
unsigned int utimeout)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
@@ -680,11 +662,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
-static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
@@ -693,25 +674,10 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
@@ -732,18 +698,9 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
return 0;
}
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
{
- return 0;
-}
-
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
-{
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
vmid);
@@ -754,6 +711,385 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
+/*
+ * GFX10 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~3500 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because current GFX10 chips cannot support multi-process debugging due to
+ * trap configuration and masking being limited to global scope. Always
+ * assume single process conditions.
+ *
+ */
+
+#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
+static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
+{
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ int i;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+ if (restore_dbg_registers) {
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, 1 << vmid);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
+ * same values.
+ */
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t tcp_watch_address_cntl;
+ uint32_t sq_watch_address_cntl;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ tcp_watch_address_cntl = 0;
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ sq_watch_address_cntl = 0;
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ /* Program {TCP,SQ}_WATCH?_ADDR* */
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+#undef TCP_WATCH_STRIDE
+#undef SQ_WATCH_STRIDE
+
+
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+static void program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ /*
+ * Program TBA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+ lower_32_bits(tba_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+ upper_32_bits(tba_addr >> 8) |
+ (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+ /*
+ * Program TMA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+ lower_32_bits(tma_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+ upper_32_bits(tma_addr >> 8));
+
+ unlock_srbm(adev);
+}
+
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -767,11 +1103,21 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
+ .program_trap_handler_settings = program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};