diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2581 |
1 files changed, 1797 insertions, 784 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a9da0486467a..8a2ee2de390f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -29,7 +29,6 @@ #include "amdgpu_gfx.h" #include "amdgpu_psp.h" #include "amdgpu_smu.h" -#include "amdgpu_atomfirmware.h" #include "imu_v11_0.h" #include "soc21.h" #include "nvd.h" @@ -42,13 +41,15 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" +#include "gfx_v11_0_cleaner_shader.h" #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -60,11 +61,32 @@ #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 +#define regPC_CONFIG_CNTL_1 0x194d +#define regPC_CONFIG_CNTL_1_BASE_IDX 1 + +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); @@ -82,6 +104,200 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); + +static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), + SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), +}; + +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -112,7 +328,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); @@ -123,20 +339,27 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel); -static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); -static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, bool enable); static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -192,7 +415,7 @@ static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, struct amdgpu_device *adev = kiq_ring->adev; uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) { + if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); return; } @@ -260,12 +483,15 @@ static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs; + adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; } static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): soc15_program_register_sequence(adev, @@ -275,6 +501,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, @@ -310,6 +540,21 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, amdgpu_ring_write(ring, inv); /* poll interval */ } +static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + amdgpu_ring_insert_nop(ring, num_nop - 1); +} + static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -358,7 +603,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; /* MES KIQ fw hasn't indirect buffer support for now */ @@ -368,33 +613,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -421,12 +651,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -444,10 +672,10 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char * { const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; - char fw_name[40]; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -463,10 +691,30 @@ out: return err; } +static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + if ((adev->gfx.me_fw_version >= 1505) && + (adev->gfx.pfp_fw_version >= 1600) && + (adev->gfx.mec_fw_version >= 512)) { + if (amdgpu_sriov_vf(adev)) + adev->gfx.cp_gfx_shadow = true; + else + adev->gfx.cp_gfx_shadow = false; + } + break; + default: + adev->gfx.cp_gfx_shadow = false; + break; + } +} + static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; - char ucode_prefix[30]; + char ucode_prefix[25]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -475,9 +723,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ @@ -493,8 +741,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; if (adev->gfx.rs64_enable) { @@ -506,8 +755,19 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } if (!amdgpu_sriov_vf(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && + adev->pdev->revision == 0xCE) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; @@ -518,8 +778,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) goto out; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; if (adev->gfx.rs64_enable) { @@ -539,6 +800,15 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) /* only one MEC for gfx 11.0.0. */ adev->gfx.mec2_fw = NULL; + gfx_v11_0_check_fw_cp_gfx_shadow(adev); + + if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + DRM_ERROR("Failed to init imu firmware!\n"); + return err; + } + out: if (err) { amdgpu_ucode_release(&adev->gfx.pfp_fw); @@ -580,12 +850,9 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; int ctx_reg_offset; if (adev->gfx.rlc.cs_data == NULL) @@ -593,39 +860,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); - ctx_reg_offset = - SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(ctx_reg_offset); buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) @@ -645,7 +888,7 @@ static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; - reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); @@ -674,7 +917,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -699,7 +942,7 @@ static int gfx_v11_0_mec_init(struct amdgpu_device *adev) u32 *hpd; size_t mec_hpd_size; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -747,7 +990,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); } -static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx11 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -773,7 +1016,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -784,7 +1027,7 @@ static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -795,11 +1038,39 @@ static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc21_grbm_select(adev, me, pipe, q, vm); } +/* all sizes are in bytes */ +#define MQD_SHADOW_BASE_SIZE 73728 +#define MQD_SHADOW_BASE_ALIGNMENT 256 +#define MQD_FWWORKAREA_SIZE 484 +#define MQD_FWWORKAREA_ALIGNMENT 256 + +static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info) +{ + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); + return 0; + } else { + memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); + return -ENOTSUPP; + } +} + static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, .select_se_sh = &gfx_v11_0_select_se_sh, @@ -808,12 +1079,12 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, + .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) { - - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): adev->gfx.config.max_hw_contexts = 8; @@ -832,6 +1103,10 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -849,9 +1124,9 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, int me, int pipe, int queue) { - int r; struct amdgpu_ring *ring; unsigned int irq_type; + unsigned int hw_prio; ring = &adev->gfx.gfx_ring[ring_id]; @@ -861,20 +1136,23 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; + if (adev->gfx.disable_kq) { + ring->no_scheduler = true; + ring->no_user_submission = true; + } if (!ring_id) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; - r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - return 0; + hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); } static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, @@ -897,7 +1175,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX11_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -1251,30 +1529,69 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) return 0; } -static int gfx_v11_0_sw_init(void *handle) +static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); + uint32_t *ptr; + uint32_t inst; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_11); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } + + /* Allocate memory for gfx queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); + inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); + adev->gfx.ip_dump_gfx_queues = NULL; + } else { + adev->gfx.ip_dump_gfx_queues = ptr; + } +} + +static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j, k, r, ring_id; + int xcc_id = 0; + struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ - adev->gfxhub.funcs->init(adev); + INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 4; - break; - case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1289,9 +1606,118 @@ static int gfx_v11_0_sw_init(void *handle) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + if (!adev->gfx.disable_uq && + adev->gfx.me_fw_version >= 2420 && + adev->gfx.pfp_fw_version >= 2580 && + adev->gfx.mec_fw_version >= 2650 && + adev->mes.fw_version[0] >= 120) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): + /* add firmware version checks here */ + if (0 && !adev->gfx.disable_uq) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + default: + break; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 2280 && + adev->gfx.pfp_fw_version >= 2370 && + adev->gfx.mec_fw_version >= 2450 && + adev->mes.fw_version[0] >= 99) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.pfp_fw_version >= 102 && + adev->gfx.mec_fw_version >= 66 && + adev->mes.fw_version[0] >= 128) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 26 && + adev->mes.fw_version[0] >= 114) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 2): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 12 && + adev->gfx.pfp_fw_version >= 15 && + adev->gfx.mec_fw_version >= 15) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 7 && + adev->gfx.pfp_fw_version >= 8 && + adev->gfx.mec_fw_version >= 8) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) && - amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && + amdgpu_sriov_is_pp_one_vf(adev)) adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; /* EOP Event */ @@ -1301,6 +1727,13 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -1315,13 +1748,6 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; - /* ECC error */ - r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_ECC_ERROR, - &adev->gfx.cp_ecc_error_irq); - if (r) - return r; - /* FED error */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, @@ -1331,14 +1757,6 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; - if (adev->gfx.imu.funcs) { - if (adev->gfx.imu.funcs->init_microcode) { - r = adev->gfx.imu.funcs->init_microcode(adev); - if (r) - DRM_ERROR("Failed to load imu firmware!\n"); - } - } - gfx_v11_0_me_init(adev); r = gfx_v11_0_rlc_init(adev); @@ -1353,55 +1771,84 @@ static int gfx_v11_0_sw_init(void *handle) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; - - r = gfx_v11_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + if (adev->gfx.num_gfx_rings) { + ring_id = 0; + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v11_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, - j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) + continue; - r = gfx_v11_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v11_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + if ((adev->gfx.me_fw_version >= 2280) && + (adev->gfx.mec_fw_version >= 2410) && + !amdgpu_sriov_vf(adev) && + !adev->debug_disable_gpu_ring_reset) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } + break; + default: + if (!amdgpu_sriov_vf(adev) && + !adev->debug_disable_gpu_ring_reset) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } + break; + } + if (!adev->enable_mes_kiq) { - r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE); + r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } - kiq = &adev->gfx.kiq; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); if (r) return r; @@ -1421,6 +1868,12 @@ static int gfx_v11_0_sw_init(void *handle) return -EINVAL; } + gfx_v11_0_alloc_ip_dump(adev); + + r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + return 0; } @@ -1453,23 +1906,25 @@ static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) (void **)&adev->gfx.rlc.rlc_autoload_ptr); } -static int gfx_v11_0_sw_fini(void *handle) +static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev, 0); if (!adev->enable_mes_kiq) { - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); - amdgpu_gfx_kiq_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); } + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v11_0_pfp_fini(adev); gfx_v11_0_me_fini(adev); gfx_v11_0_rlc_fini(adev); @@ -1480,11 +1935,17 @@ static int gfx_v11_0_sw_fini(void *handle) gfx_v11_0_free_microcode(adev); + amdgpu_gfx_sysfs_fini(adev); + + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + kfree(adev->gfx.ip_dump_gfx_queues); + return 0; } static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -1549,6 +2010,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1566,12 +2028,14 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -1605,12 +2069,15 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) /* Enable trap for each kfd vmid. */ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + /* + * Initialize all compute VMIDs to have no GDS, GWS, or OA + * access. These should be enabled by FW for target VMIDs. + */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); @@ -1674,7 +2141,7 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc21_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -1694,26 +2161,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) gfx_v11_0_init_gds_vmid(adev); } +static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); - - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v11_0_init_csb(struct amdgpu_device *adev) @@ -1925,7 +2440,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) if (version_minor == 3) gfx_v11_0_load_rlcp_rlcv_microcode(adev); } - + return 0; } @@ -1995,7 +2510,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2039,7 +2554,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2084,7 +2599,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -2524,8 +3039,14 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) for (i = 0; i < adev->usec_timeout; i++) { cp_status = RREG32_SOC15(GC, 0, regCP_STAT); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(11, 0, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(11, 0, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -2714,7 +3235,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2932,7 +3453,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -3195,7 +3716,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); @@ -3219,7 +3739,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); - /* set the wb address wether it's enabled or not */ + /* set the wb address whether it's enabled or not */ rptr_addr = ring->rptr_gpu_addr; WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & @@ -3257,7 +3777,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) ring->wptr = 0; WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); - /* Set the wb address wether it's enabled or not */ + /* Set the wb address whether it's enabled or not */ rptr_addr = ring->rptr_gpu_addr; WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & @@ -3287,11 +3807,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v11_0_cp_gfx_start(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -3337,8 +3852,6 @@ static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); } - adev->gfx.kiq.ring.sched.ready = enable; - udelay(50); } @@ -3375,7 +3888,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) } memcpy(fw, fw_data, fw_size); - + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); @@ -3534,9 +4047,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -3554,6 +4065,24 @@ static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) (adev->doorbell_index.userqueue_end * 2) << 2); } +static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, + struct v11_gfx_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + bool priority = 0; + u32 tmp; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority + */ + if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) + priority = 1; + + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); + mqd->cp_gfx_hqd_queue_priority = tmp; +} + static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, struct amdgpu_mqd_prop *prop) { @@ -3571,25 +4100,22 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; - /* set up default queue priority level - * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); - tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); - mqd->cp_gfx_hqd_queue_priority = tmp; + /* set up gfx queue priority */ + gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -3611,16 +4137,20 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -3632,164 +4162,67 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; - return 0; -} - -#ifdef BRING_UP_DEBUG -static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - struct v11_gfx_mqd *mqd = ring->mqd_ptr; - - /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); - - /* set GFX_MQD_BASE */ - WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); - WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set GFX_MQD_CONTROL */ - WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); - - /* set GFX_HQD_VMID to 0 */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); - - WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY, - mqd->cp_gfx_hqd_queue_priority); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); - - /* set GFX_HQD_BASE, similar as CP_RB_BASE */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); - - /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); - - /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); - - /* set RB_WPTR_POLL_ADDR */ - WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); - - /* set RB_DOORBELL_CONTROL */ - WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); - - /* active the queue */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); + mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } -#endif -static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v11_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); amdgpu_ring_init_mqd(ring); -#ifdef BRING_UP_DEBUG - gfx_v11_0_gfx_queue_init_register(ring); -#endif soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { - /* reset mqd with the backup copy */ + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { + /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; amdgpu_ring_clear_ring(ring); -#ifdef BRING_UP_DEBUG - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v11_0_gfx_queue_init_register(ring); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -#endif - } else { - amdgpu_ring_clear_ring(ring); } return 0; } -#ifndef BRING_UP_DEBUG -static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - int r, i; - - if (!kiq->pmf || !kiq->pmf->kiq_map_queues) - return -EINVAL; - - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * - adev->gfx.num_gfx_rings); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); - - return amdgpu_ring_test_helper(kiq_ring); -} -#endif - static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; - struct amdgpu_ring *ring; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_gfx_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) - goto done; + return r; } -#ifndef BRING_UP_DEBUG - r = gfx_v11_0_kiq_enable_kgq(adev); - if (r) - goto done; -#endif - r = gfx_v11_0_cp_gfx_start(adev); + + r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; + return r; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } -done: - return r; + return gfx_v11_0_cp_gfx_start(adev); } static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, @@ -3812,14 +4245,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -3848,7 +4281,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -3858,15 +4291,20 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ @@ -3883,7 +4321,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -3898,17 +4336,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; @@ -3918,6 +4356,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -4035,14 +4477,13 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v11_compute_mqd *mqd = ring->mqd_ptr; - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; gfx_v11_0_kiq_setting(ring); if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -4064,20 +4505,20 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; } -static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v11_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -4086,18 +4527,15 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; @@ -4105,57 +4543,24 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq.ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v11_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; + gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; if (!amdgpu_async_gfx_ring) gfx_v11_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kcq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) @@ -4208,11 +4613,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) return r; } - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->gfx.disable_kq) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we don't want to set ring->ready */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + } + if (amdgpu_async_gfx_ring) + amdgpu_gfx_disable_kgq(adev, 0); + } else { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { @@ -4240,13 +4657,14 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->gfxhub.funcs->set_fault_enable_default(adev, value); - amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + /* TODO investigate why this and the hdp flush above is needed, + * are we missing a flush somewhere else? */ + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); return 0; } @@ -4319,21 +4737,24 @@ static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } -static int gfx_v11_0_hw_init(void *handle) +static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { if (adev->gfx.imu.funcs) { /* RLC autoload sequence 1: Program rlc ram */ if (adev->gfx.imu.funcs->program_rlc_ram) adev->gfx.imu.funcs->program_rlc_ram(adev); + /* rlc autoload firmware */ + r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); + if (r) + return r; } - /* rlc autoload firmware */ - r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); - if (r) - return r; } else { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { @@ -4382,11 +4803,9 @@ static int gfx_v11_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; } gfx_v11_0_constants_init(adev); @@ -4411,52 +4830,75 @@ static int gfx_v11_0_hw_init(void *handle) if (r) return r; + /* get IMU version from HW if it's not set */ + if (!adev->gfx.imu_fw_version) + adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); + return r; } -#ifndef BRING_UP_DEBUG -static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev) +static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); + unsigned int irq_type; + int m, p, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } - if (adev->gfx.kiq.ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } - return r; + return 0; } -#endif -static int gfx_v11_0_hw_fini(void *handle) +static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + struct amdgpu_device *adev = ip_block->adev; + + cancel_delayed_work_sync(&adev->gfx.idle_work); - amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v11_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { -#ifndef BRING_UP_DEBUG - if (amdgpu_async_gfx_ring) { - r = gfx_v11_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_async_gfx_ring && + !adev->gfx.disable_kq) { + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } -#endif - if (amdgpu_gfx_disable_kcq(adev)) + + if (amdgpu_gfx_disable_kcq(adev, 0)) DRM_ERROR("KCQ disable failed\n"); amdgpu_mes_kiq_hw_fini(adev); @@ -4480,19 +4922,19 @@ static int gfx_v11_0_hw_fini(void *handle) return 0; } -static int gfx_v11_0_suspend(void *handle) +static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v11_0_hw_fini(handle); + return gfx_v11_0_hw_fini(ip_block); } -static int gfx_v11_0_resume(void *handle) +static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v11_0_hw_init(handle); + return gfx_v11_0_hw_init(ip_block); } -static bool gfx_v11_0_is_idle(void *handle) +static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) @@ -4501,11 +4943,11 @@ static bool gfx_v11_0_is_idle(void *handle) return true; } -static int gfx_v11_0_wait_for_idle(void *handle) +static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -4519,12 +4961,46 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v11_0_soft_reset(void *handle) +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + +static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 grbm_soft_reset = 0; u32 tmp; - int i, j, k; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r, i, j, k; + struct amdgpu_device *adev = ip_block->adev; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); @@ -4533,16 +5009,11 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev); - + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); @@ -4552,16 +5023,23 @@ static int gfx_v11_0_soft_reset(void *handle) for (i = 0; i < adev->gfx.me.num_me; ++i) { for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); } } } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ + mutex_lock(&adev->gfx.reset_sem_mutex); + r = gfx_v11_0_request_gfx_index_mutex(adev, true); + if (r) { + mutex_unlock(&adev->gfx.reset_sem_mutex); + DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); + return r; + } WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); @@ -4571,6 +5049,14 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); RREG32_SOC15(GC, 0, regCP_VMID_RESET); + /* release the gfx mutex */ + r = gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + if (r) { + DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); + return r; + } + for (i = 0; i < adev->usec_timeout; i++) { if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) @@ -4633,15 +5119,15 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_unset_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return gfx_v11_0_cp_resume(adev); } -static bool gfx_v11_0_check_soft_reset(void *handle) +static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; long tmo = msecs_to_jiffies(1000); @@ -4662,12 +5148,13 @@ static bool gfx_v11_0_check_soft_reset(void *handle) return false; } -static int gfx_v11_0_post_soft_reset(void *handle) +static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; /** * GFX soft reset will impact MES, need resume MES when do GFX soft reset */ - return amdgpu_mes_resume((struct amdgpu_device *)handle); + return amdgpu_mes_resume(adev); } static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) @@ -4675,24 +5162,27 @@ static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; - amdgpu_gfx_off_ctrl(adev, false); - mutex_lock(&adev->gfx.gpu_clock_mutex); if (amdgpu_sriov_vf(adev)) { + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->gfx.gpu_clock_mutex); clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); if (clock_counter_hi_pre != clock_counter_hi_after) clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); } else { + preempt_disable(); clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); if (clock_counter_hi_pre != clock_counter_hi_after) clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); } clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); - mutex_unlock(&adev->gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); + return clock; } @@ -4725,15 +5215,40 @@ static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } -static int gfx_v11_0_early_init(void *handle) +static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: + adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } adev->gfx.funcs = &gfx_v11_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + /* We need one GFX ring temporarily to set up + * the clear state. + */ + adev->gfx.num_gfx_rings = 1; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v11_0_set_kiq_pm4_funcs(adev); gfx_v11_0_set_ring_funcs(adev); @@ -4748,29 +5263,9 @@ static int gfx_v11_0_early_init(void *handle) return gfx_v11_0_init_microcode(adev); } -static int gfx_v11_0_ras_late_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if *gfx_common_if; - int ret; - - gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!gfx_common_if) - return -ENOMEM; - - gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; - - ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); - if (ret) - dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); - - kfree(gfx_common_if); - return 0; -} - -static int gfx_v11_0_late_init(void *handle) +static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); @@ -4781,11 +5276,13 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { - r = gfx_v11_0_ras_late_init(handle); - if (r) - return r; - } + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + + r = gfx_v11_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; return 0; } @@ -4799,7 +5296,7 @@ static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; } -static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -4818,7 +5315,7 @@ static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); } @@ -5046,7 +5543,7 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); @@ -5066,32 +5563,40 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, AMD_CG_SUPPORT_GFX_3D_CGLS)) gfx_v11_0_enable_gui_idle_interrupt(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } -static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { - u32 reg, data; + u32 reg, pre_data, data; amdgpu_gfx_off_ctrl(adev, false); - reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) + pre_data = RREG32_NO_KIQ(reg); else - data = RREG32(reg); + pre_data = RREG32(reg); - data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); - + if (pre_data != data) { + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + } else + WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + } amdgpu_gfx_off_ctrl(adev, true); + + if (ring + && amdgpu_sriov_is_pp_one_vf(adev) + && (pre_data != data) + && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) + || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { + amdgpu_ring_emit_wreg(ring, reg, data); + } } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { @@ -5121,9 +5626,13 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) // Program RLC_PG_DELAY3 for CGPG hysteresis if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5134,23 +5643,23 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v11_cntl_power_gating(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } -static int gfx_v11_0_set_powergating_state(void *handle, +static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): @@ -5158,8 +5667,18 @@ static int gfx_v11_0_set_powergating_state(void *handle, break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): + if (!enable) + amdgpu_gfx_off_ctrl(adev, false); + gfx_v11_cntl_pg(adev, enable); - amdgpu_gfx_off_ctrl(adev, enable); + + if (enable) + amdgpu_gfx_off_ctrl(adev, true); + break; default: break; @@ -5168,20 +5687,24 @@ static int gfx_v11_0_set_powergating_state(void *handle, return 0; } -static int gfx_v11_0_set_clockgating_state(void *handle, +static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5192,9 +5715,9 @@ static int gfx_v11_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; /* AMD_CG_SUPPORT_GFX_MGCG */ @@ -5258,45 +5781,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -5321,42 +5816,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ - } + BUG(); /* only DOORBELL method supported on gfx11 now */ } } @@ -5379,7 +5846,7 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } reg_mem_engine = 0; } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; reg_mem_engine = 1; /* pfp */ } @@ -5397,27 +5864,21 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); - if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) control |= INDIRECT_BUFFER_PRE_RESUME(1); - if (vmid) + if (vmid && !ring->adev->gfx.rs64_enable) gfx_v11_0_ring_emit_de_meta(ring, - (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); + !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED)); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -5437,10 +5898,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -5478,11 +5935,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | PACKET3_RELEASE_MEM_GCR_GL2_WB | - PACKET3_RELEASE_MEM_GCR_GL2_INV | - PACKET3_RELEASE_MEM_GCR_GL2_US | - PACKET3_RELEASE_MEM_GCR_GL1_INV | - PACKET3_RELEASE_MEM_GCR_GLV_INV | - PACKET3_RELEASE_MEM_GCR_GLM_INV | + PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ PACKET3_RELEASE_MEM_GCR_GLM_WB | PACKET3_RELEASE_MEM_CACHE_POLICY(3) | PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | @@ -5502,8 +5955,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -5531,10 +5983,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -5542,6 +5991,12 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); } + + /* Make sure that we can't skip the SET_Q_MODE packets when the VM + * changed in any way. + */ + ring->set_q_mode_offs = 0; + ring->set_q_mode_ptr = NULL; } static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, @@ -5591,41 +6046,125 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } -static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, + u64 shadow_va, u64 csa_va, + u64 gds_va, bool init_shadow, + int vmid) { - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); + struct amdgpu_device *adev = ring->adev; + unsigned int offs, end; - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; + if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) + return; + + /* + * The logic here isn't easy to understand because we need to keep state + * accross multiple executions of the function as well as between the + * CPU and GPU. The general idea is that the newly written GPU command + * has a condition on the previous one and only executed if really + * necessary. + */ + + /* + * The dw in the NOP controls if the next SET_Q_MODE packet should be + * executed or not. Reserve 64bits just to be on the save side. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); + offs = ring->wptr & ring->buf_mask; + + /* + * We start with skipping the prefix SET_Q_MODE and always executing + * the postfix SET_Q_MODE packet. This is changed below with a + * WRITE_DATA command when the postfix executed. + */ + amdgpu_ring_write(ring, shadow_va ? 1 : 0); + amdgpu_ring_write(ring, 0); + + if (ring->set_q_mode_offs) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += ring->set_q_mode_offs << 2; + end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); + } + + /* + * When the postfix SET_Q_MODE packet executes we need to make sure that the + * next prefix SET_Q_MODE packet executes as well. + */ + if (!shadow_va) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += offs << 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 0x1); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); + amdgpu_ring_write(ring, lower_32_bits(shadow_va)); + amdgpu_ring_write(ring, upper_32_bits(shadow_va)); + amdgpu_ring_write(ring, lower_32_bits(gds_va)); + amdgpu_ring_write(ring, upper_32_bits(gds_va)); + amdgpu_ring_write(ring, lower_32_bits(csa_va)); + amdgpu_ring_write(ring, upper_32_bits(csa_va)); + amdgpu_ring_write(ring, shadow_va ? + PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); + amdgpu_ring_write(ring, init_shadow ? + PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); + + if (ring->set_q_mode_offs) + amdgpu_ring_patch_cond_exec(ring, end); + + if (shadow_va) { + uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; + + /* + * If the tokens match try to skip the last postfix SET_Q_MODE + * packet to avoid saving/restoring the state all the time. + */ + if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) + *ring->set_q_mode_ptr = 0; + + ring->set_q_mode_token = token; + } else { + ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; + } + + ring->set_q_mode_offs = offs; } static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; + if (adev->enable_mes) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -5673,28 +6212,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -5781,19 +6305,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - WREG32_SOC15(GC, 0, regSQ_CMD, value); -} - static void gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5897,36 +6408,6 @@ static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev } } -#define CP_ME1_PIPE_INST_ADDR_INTERVAL 0x1 -#define SET_ECC_ME_PIPE_STATE(reg_addr, state) \ - do { \ - uint32_t tmp = RREG32_SOC15_IP(GC, reg_addr); \ - tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, state); \ - WREG32_SOC15_IP(GC, reg_addr, tmp); \ - } while (0) - -static int gfx_v11_0_set_cp_ecc_error_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - uint32_t ecc_irq_state = 0; - uint32_t pipe0_int_cntl_addr = 0; - int i = 0; - - ecc_irq_state = (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0; - - pipe0_int_cntl_addr = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); - - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, ecc_irq_state); - - for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) - SET_ECC_ME_PIPE_STATE(pipe0_int_cntl_addr + i * CP_ME1_PIPE_INST_ADDR_INTERVAL, - ecc_irq_state); - - return 0; -} - static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -5961,25 +6442,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -6014,15 +6493,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6031,17 +6537,75 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6061,27 +6625,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) - drm_sched_fault(&ring->sched); - } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - default: - BUG(); - break; } } @@ -6094,6 +6660,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v11_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6120,7 +6695,7 @@ static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { uint32_t tmp, target; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); target += ring->pipe; @@ -6180,6 +6755,426 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + + +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, + * so the pipe reset status relies on the later gfx ring test result. + */ + return 0; +} + +static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + if (r) { + + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v11_reset_gfx_pipe(ring); + if (r) + return r; + } + + r = gfx_v11_0_kgq_init_queue(ring, true); + if (r) { + dev_err(adev->dev, "failed to init kgq\n"); + return r; + } + + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe + * reset status relies on the compute ring test result. + */ + return 0; +} + +static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r = 0; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); + if (r) { + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v11_0_reset_compute_pipe(ring); + if (r) + return r; + } + + r = gfx_v11_0_kcq_init_queue(ring, true); + if (r) { + dev_err(adev->dev, "fail to init kcq\n"); + return r; + } + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kcq\n"); + return r; + } + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); + + if (!adev->gfx.ip_dump_core) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_11_0[i].reg_name, + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_11); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + + /* print gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); + drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", + adev->gfx.me.num_me, + adev->gfx.me.num_pipe_per_me, + adev->gfx.me.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_gfx_queue_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_gfx_queues[index + reg]); + } + index += reg_count; + } + } + } +} + +static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); + + if (!adev->gfx.ip_dump_core) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_11); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); + for (reg = 0; reg < reg_count; reg++) { + if (i && + gc_cp_reg_list_11[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, + regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_11[reg])); + } + index += reg_count; + } + } + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + soc21_grbm_select(adev, i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_gfx_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_gfx_queue_reg_list_11[reg])); + } + index += reg_count; + } + } + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + +static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + +static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_begin_use(ring); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); +} + +static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_end_use(ring); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .name = "gfx_v11_0", .early_init = gfx_v11_0_early_init, @@ -6198,6 +7193,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, + .dump_ip_state = gfx_v11_ip_dump, + .print_ip_state = gfx_v11_ip_print, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { @@ -6209,12 +7206,14 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, .set_wptr = gfx_v11_0_ring_set_wptr_gfx, - .emit_frame_size = /* totally 242 maximum if 16 IBs */ + .emit_frame_size = /* totally 247 maximum if 16 IBs */ + 5 + /* update_spm_vmid */ 5 + /* COND_EXEC */ + 22 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6223,8 +7222,10 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ + 22 + /* SET_Q_PREEMPTION_MODE */ 8 + 8 + /* FENCE x2 */ - 8, /* gfx_v11_0_emit_mem_sync */ + 8 + /* gfx_v11_0_emit_mem_sync */ + 2, /* gfx_v11_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ .emit_ib = gfx_v11_0_ring_emit_ib_gfx, .emit_fence = gfx_v11_0_ring_emit_fence, @@ -6234,18 +7235,21 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, + .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v11_0_ring_preempt_ib, .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, + .reset = gfx_v11_0_reset_kgq, + .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, + .begin_use = gfx_v11_0_ring_begin_use, + .end_use = gfx_v11_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { @@ -6257,6 +7261,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, .emit_frame_size = + 5 + /* update_spm_vmid */ 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ @@ -6265,7 +7270,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ - 8, /* gfx_v11_0_emit_mem_sync */ + 8 + /* gfx_v11_0_emit_mem_sync */ + 2, /* gfx_v11_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, .emit_fence = gfx_v11_0_ring_emit_fence, @@ -6275,12 +7281,16 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, .emit_mem_sync = gfx_v11_0_emit_mem_sync, + .reset = gfx_v11_0_reset_kcq, + .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, + .begin_use = gfx_v11_0_ring_begin_use, + .end_use = gfx_v11_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { @@ -6298,7 +7308,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, @@ -6311,13 +7320,14 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, + .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, }; static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq; + adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; @@ -6336,16 +7346,16 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { .process = gfx_v11_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { + .set = gfx_v11_0_set_bad_op_fault_state, + .process = gfx_v11_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { .set = gfx_v11_0_set_priv_inst_fault_state, .process = gfx_v11_0_priv_inst_irq, }; -static const struct amdgpu_irq_src_funcs gfx_v11_0_cp_ecc_error_irq_funcs = { - .set = gfx_v11_0_set_cp_ecc_error_state, - .process = amdgpu_gfx_cp_ecc_error_irq, -}; - static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { .process = gfx_v11_0_rlc_gc_fed_irq, }; @@ -6358,12 +7368,12 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; - adev->gfx.cp_ecc_error_irq.num_types = 1; /* CP ECC error */ - adev->gfx.cp_ecc_error_irq.funcs = &gfx_v11_0_cp_ecc_error_irq_funcs; - adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; @@ -6472,9 +7482,12 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) + continue; mask = 1; counter = 0; - gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 8 && j < 2) gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -6495,7 +7508,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} */ - cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; + cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) @@ -6506,7 +7519,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; } } - gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; |
