diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 556 |
1 files changed, 451 insertions, 105 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 491acdf92f73..0239114fb6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -21,6 +21,7 @@ */ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_gfx_v10.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" #include "athub/athub_2_0_0_offset.h" @@ -31,6 +32,7 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" +#include <uapi/linux/kfd_ioctl.h> enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -39,37 +41,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -81,33 +72,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, - unsigned int vmid) +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, + unsigned int vmid, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -150,22 +137,22 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -218,12 +205,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v10_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_compute_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; @@ -231,7 +217,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -296,24 +282,23 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t doorbell_off) + uint32_t doorbell_off, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; struct v10_compute_mqd *m; uint32_t mec, pipe; int r; m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -321,7 +306,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); @@ -348,17 +333,16 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, amdgpu_ring_commit(kiq_ring); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + spin_unlock(&adev->gfx.kiq[0].ring_lock); + release_queue(adev); return r; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -368,17 +352,17 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -386,10 +370,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -456,18 +439,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -488,15 +470,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -506,13 +488,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -529,12 +510,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; @@ -548,7 +528,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, int retry; #endif - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -560,6 +540,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; @@ -630,20 +613,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -680,11 +662,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); @@ -693,25 +674,10 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct kgd_dev *kgd) -{ - return 0; -} - -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -732,18 +698,9 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - return 0; -} - -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", vmid); @@ -754,6 +711,385 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +/* + * GFX10 helper for wave launch stall requirements on debug trap setting. + * + * vmid: + * Target VMID to stall/unstall. + * + * stall: + * 0-unstall wave launch (enable), 1-stall wave launch (disable). + * After wavefront launch has been stalled, allocated waves must drain from + * SPI in order for debug trap settings to take effect on those waves. + * This is roughly a ~3500 clock cycle wait on SPI where a read on + * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles. + * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required. + * + * NOTE: We can afford to clear the entire STALL_VMID field on unstall + * because current GFX10 chips cannot support multi-process debugging due to + * trap configuration and masking being limited to global scope. Always + * assume single process conditions. + * + */ + +#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110 +static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall) +{ + uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + int i; + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID, + stall ? 1 << vmid : 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + if (!stall) + return; + + for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++) + RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); +} + +uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + /* assume gfx off is disabled for the debug session if rlc restore not supported. */ + if (restore_dbg_registers) { + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + VMID_SEL, 1 << vmid); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + TRAP_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH; + + /* The SPI_GDBG_TRAP_MASK register is global and affects all + * processes. Only allow OR-ing the address-watch bit, since + * this only affects processes under the debugger. Other bits + * should stay 0 to avoid the debugger interfering with other + * processes. + */ + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR) + return -EINVAL; + + return 0; +} + +uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) +{ + uint32_t data, wave_cntl_prev; + + mutex_lock(&adev->grbm_idx_mutex); + + wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK)); + *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN); + + trap_mask_bits = (trap_mask_bits & trap_mask_request) | + (*trap_mask_prev & ~trap_mask_request); + + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); + + /* We need to preserve wave launch mode stall settings. */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + bool is_mode_set = !!wave_launch_mode; + + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + VMID_MASK, is_mode_set ? 1 << vmid : 0); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + MODE, is_mode_set ? wave_launch_mode : 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H) +#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H) +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid, + uint32_t inst) +{ + /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the + * same values. + */ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t tcp_watch_address_cntl; + uint32_t sq_watch_address_cntl; + + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + tcp_watch_address_cntl = 0; + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, + TCP_WATCH0_CNTL, + VMID, + debug_vmid); + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 7); + + sq_watch_address_cntl = 0; + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VMID, + debug_vmid); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MODE, + watch_mode); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MASK, + watch_address_mask >> 6); + + /* Turning off this watch point until we set all the registers */ + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 0); + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 0); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); + + /* Program {TCP,SQ}_WATCH?_ADDR* */ + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high); + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low); + + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_high); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_low); + + /* Enable the watch point */ + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 1); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); + + return 0; +} + +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_cntl); + + return 0; +} +#undef TCP_WATCH_STRIDE +#undef SQ_WATCH_STRIDE + + +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * The values read are: + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. + * gws_wait_time -- Wait Count for Global Wave Syncs. + * que_sleep_wait_time -- Wait Count for Dequeue Retry. + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. + */ +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times, + uint32_t inst) + +{ + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); +} + +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t sch_wave, + uint32_t que_sleep, + uint32_t *reg_offset, + uint32_t *reg_data) +{ + *reg_data = wait_times; + + if (sch_wave) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + sch_wave); + if (que_sleep) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + QUE_SLEEP, + que_sleep); + + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); +} + +static void program_trap_handler_settings(struct amdgpu_device *adev, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, + uint32_t inst) +{ + lock_srbm(adev, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8) | + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(adev); +} + +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + +uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -767,11 +1103,21 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v10_set_address_watch, + .clear_address_watch = kgd_gfx_v10_clear_address_watch, + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, + .program_trap_handler_settings = program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell }; |
