summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c2242
1 files changed, 1691 insertions, 551 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 5c3db694afa8..8a2ee2de390f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -29,7 +29,6 @@
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
#include "amdgpu_smu.h"
-#include "amdgpu_atomfirmware.h"
#include "imu_v11_0.h"
#include "soc21.h"
#include "nvd.h"
@@ -42,13 +41,15 @@
#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
#include "soc15.h"
-#include "soc15d.h"
#include "clearstate_gfx11.h"
#include "v11_structs.h"
#include "gfx_v11_0.h"
+#include "gfx_v11_0_cleaner_shader.h"
#include "gfx_v11_0_3.h"
#include "nbio_v4_3.h"
#include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
#define GFX11_NUM_GFX_RINGS 1
#define GFX11_MEC_HPD_SIZE 2048
@@ -60,11 +61,32 @@
#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e
#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
+#define regPC_CONFIG_CNTL_1 0x194d
+#define regPC_CONFIG_CNTL_1_BASE_IDX 1
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
@@ -82,6 +104,200 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
{
@@ -130,13 +346,20 @@ static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -265,7 +488,10 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
soc15_program_register_sequence(adev,
@@ -275,6 +501,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
default:
break;
}
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
}
static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
@@ -310,6 +540,21 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
amdgpu_ring_write(ring, inv); /* poll interval */
}
+static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -358,7 +603,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned index;
uint64_t gpu_addr;
- volatile uint32_t *cpu_ptr;
+ uint32_t *cpu_ptr;
long r;
/* MES KIQ fw hasn't indirect buffer support for now */
@@ -368,33 +613,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -421,12 +651,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -444,10 +672,10 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *
{
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- char fw_name[40];
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
if (err)
goto out;
@@ -465,7 +693,7 @@ out:
static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
@@ -486,8 +714,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
{
- char fw_name[40];
- char ucode_prefix[30];
+ char ucode_prefix[25];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -496,9 +723,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
@@ -514,8 +741,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -527,8 +755,19 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
}
if (!amdgpu_sriov_vf(adev)) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+ adev->pdev->revision == 0xCE)
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
@@ -539,8 +778,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -561,6 +801,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.mec2_fw = NULL;
gfx_v11_0_check_fw_cp_gfx_shadow(adev);
+
+ if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ DRM_ERROR("Failed to init imu firmware!\n");
+ return err;
+ }
+
out:
if (err) {
amdgpu_ucode_release(&adev->gfx.pfp_fw);
@@ -602,12 +850,9 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -615,39 +860,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
@@ -696,7 +917,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -828,14 +1049,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
#define MQD_FWWORKAREA_SIZE 484
#define MQD_FWWORKAREA_ALIGNMENT 256
-static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info)
{
- if (adev->gfx.cp_gfx_shadow) {
- shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
- shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
- shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
- shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
return 0;
} else {
memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
@@ -856,8 +1084,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
{
-
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
adev->gfx.config.max_hw_contexts = 8;
@@ -876,6 +1103,10 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -893,9 +1124,9 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
- int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -905,6 +1136,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->ring_obj = NULL;
ring->use_doorbell = true;
+ if (adev->gfx.disable_kq) {
+ ring->no_scheduler = true;
+ ring->no_user_submission = true;
+ }
if (!ring_id)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
@@ -914,11 +1149,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
- r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
- if (r)
- return r;
- return 0;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
}
static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@@ -1295,30 +1529,69 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v11_0_sw_init(void *handle)
+static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- int i, j, k, r, ring_id = 0;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, j, k, r, ring_id;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
- adev->gfxhub.funcs->init(adev);
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
- adev->gfx.mec.num_mec = 2;
- adev->gfx.mec.num_pipe_per_mec = 4;
- adev->gfx.mec.num_queue_per_pipe = 4;
- break;
- case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -1333,9 +1606,118 @@ static int gfx_v11_0_sw_init(void *handle)
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2420 &&
+ adev->gfx.pfp_fw_version >= 2580 &&
+ adev->gfx.mec_fw_version >= 2650 &&
+ adev->mes.fw_version[0] >= 120) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* add firmware version checks here */
+ if (0 && !adev->gfx.disable_uq) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 2280 &&
+ adev->gfx.pfp_fw_version >= 2370 &&
+ adev->gfx.mec_fw_version >= 2450 &&
+ adev->mes.fw_version[0] >= 99) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.pfp_fw_version >= 102 &&
+ adev->gfx.mec_fw_version >= 66 &&
+ adev->mes.fw_version[0] >= 128) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 26 &&
+ adev->mes.fw_version[0] >= 114) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 12 &&
+ adev->gfx.pfp_fw_version >= 15 &&
+ adev->gfx.mec_fw_version >= 15) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 7 &&
+ adev->gfx.pfp_fw_version >= 8 &&
+ adev->gfx.mec_fw_version >= 8) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
- amdgpu_sriov_is_pp_one_vf(adev))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
+ amdgpu_sriov_is_pp_one_vf(adev))
adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
/* EOP Event */
@@ -1345,6 +1727,13 @@ static int gfx_v11_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
@@ -1368,14 +1757,6 @@ static int gfx_v11_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- if (adev->gfx.imu.funcs) {
- if (adev->gfx.imu.funcs->init_microcode) {
- r = adev->gfx.imu.funcs->init_microcode(adev);
- if (r)
- DRM_ERROR("Failed to load imu firmware!\n");
- }
- }
-
gfx_v11_0_me_init(adev);
r = gfx_v11_0_rlc_init(adev);
@@ -1390,41 +1771,71 @@ static int gfx_v11_0_sw_init(void *handle)
return r;
}
- /* set up the gfx ring */
- for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
- continue;
-
- r = gfx_v11_0_gfx_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
- ring_id++;
+ if (adev->gfx.num_gfx_rings) {
+ ring_id = 0;
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
}
}
}
- ring_id = 0;
- /* set up the compute queues - allocate horizontally across pipes */
- for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
- for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
- k, j))
- continue;
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
- r = gfx_v11_0_compute_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
+ r = gfx_v11_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
- ring_id++;
+ ring_id++;
+ }
}
}
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 2280) &&
+ (adev->gfx.mec_fw_version >= 2410) &&
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ }
+
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
if (r) {
@@ -1432,8 +1843,7 @@ static int gfx_v11_0_sw_init(void *handle)
return r;
}
- kiq = &adev->gfx.kiq[0];
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
}
@@ -1458,6 +1868,12 @@ static int gfx_v11_0_sw_init(void *handle)
return -EINVAL;
}
+ gfx_v11_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -1490,10 +1906,10 @@ static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.rlc_autoload_ptr);
}
-static int gfx_v11_0_sw_fini(void *handle)
+static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -1507,6 +1923,8 @@ static int gfx_v11_0_sw_fini(void *handle)
amdgpu_gfx_kiq_fini(adev, 0);
}
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v11_0_pfp_fini(adev);
gfx_v11_0_me_fini(adev);
gfx_v11_0_rlc_fini(adev);
@@ -1517,6 +1935,12 @@ static int gfx_v11_0_sw_fini(void *handle)
gfx_v11_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
return 0;
}
@@ -1586,6 +2010,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
{
+ u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
@@ -1603,12 +2028,14 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
- active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -1647,8 +2074,10 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
@@ -1732,26 +2161,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
gfx_v11_0_init_gds_vmid(adev);
}
+static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
@@ -1963,7 +2440,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
if (version_minor == 3)
gfx_v11_0_load_rlcp_rlcv_microcode(adev);
}
-
+
return 0;
}
@@ -2033,7 +2510,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -2077,7 +2554,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -2122,7 +2599,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -2562,8 +3039,14 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
for (i = 0; i < adev->usec_timeout; i++) {
cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
bootload_status = RREG32_SOC15(GC, 0,
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
else
@@ -2752,7 +3235,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
@@ -2970,7 +3453,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
@@ -3256,7 +3739,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -3294,7 +3777,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
+ /* Set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -3405,7 +3888,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
memcpy(fw, fw_data, fw_size);
-
+
amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
@@ -3564,9 +4047,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
@@ -3584,6 +4065,24 @@ static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
(adev->doorbell_index.userqueue_end * 2) << 2);
}
+static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v11_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@@ -3601,25 +4100,22 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
mqd->cp_gfx_mqd_control = tmp;
/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
mqd->cp_gfx_hqd_quantum = tmp;
@@ -3641,16 +4137,20 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
mqd->cp_gfx_hqd_cntl = tmp;
/* set up cp_doorbell_control */
- tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -3662,21 +4162,31 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_rb_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
/* active the queue */
mqd->cp_gfx_hqd_active = 1;
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+ mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
-static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v11_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -3684,11 +4194,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
@@ -3701,22 +4211,9 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -3748,14 +4245,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
mqd->cp_hqd_eop_control = tmp;
/* enable doorbell? */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -3784,7 +4281,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
- tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ tmp = regCP_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
mqd->cp_mqd_control = tmp;
@@ -3794,15 +4291,20 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
@@ -3819,7 +4321,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = 0;
/* enable the doorbell if requested */
if (prop->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -3834,17 +4336,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
mqd->cp_hqd_persistent_state = tmp;
/* set MIN_IB_AVAIL_SIZE */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
@@ -3854,6 +4356,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_active = prop->hqd_active;
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -3977,7 +4483,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -4000,19 +4506,19 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
}
-static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v11_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -4021,11 +4527,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
@@ -4037,57 +4543,24 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v11_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
if (!amdgpu_async_gfx_ring)
gfx_v11_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
@@ -4140,11 +4613,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
return r;
}
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
+ if (adev->gfx.disable_kq) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we don't want to set ring->ready */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+ }
+ if (amdgpu_async_gfx_ring)
+ amdgpu_gfx_disable_kgq(adev, 0);
+ } else {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
@@ -4172,13 +4657,14 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
if (r)
return r;
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
- value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
- false : true;
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
adev->gfxhub.funcs->set_fault_enable_default(adev, value);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
return 0;
}
@@ -4251,21 +4737,24 @@ static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
}
-static int gfx_v11_0_hw_init(void *handle)
+static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
if (adev->gfx.imu.funcs) {
/* RLC autoload sequence 1: Program rlc ram */
if (adev->gfx.imu.funcs->program_rlc_ram)
adev->gfx.imu.funcs->program_rlc_ram(adev);
+ /* rlc autoload firmware */
+ r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
}
- /* rlc autoload firmware */
- r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
- if (r)
- return r;
} else {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
@@ -4314,11 +4803,9 @@ static int gfx_v11_0_hw_init(void *handle)
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (!(adev->flags & AMD_IS_APU)) {
- r = amdgpu_pm_load_smu_firmware(adev, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
}
gfx_v11_0_constants_init(adev);
@@ -4343,18 +4830,70 @@ static int gfx_v11_0_hw_init(void *handle)
if (r)
return r;
+ /* get IMU version from HW if it's not set */
+ if (!adev->gfx.imu_fw_version)
+ adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
+
return r;
}
-static int gfx_v11_0_hw_fini(void *handle)
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v11_0_set_userq_eop_interrupts(adev, false);
if (!adev->no_hw_access) {
- if (amdgpu_async_gfx_ring) {
+ if (amdgpu_async_gfx_ring &&
+ !adev->gfx.disable_kq) {
if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
@@ -4383,19 +4922,19 @@ static int gfx_v11_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v11_0_suspend(void *handle)
+static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v11_0_hw_fini(handle);
+ return gfx_v11_0_hw_fini(ip_block);
}
-static int gfx_v11_0_resume(void *handle)
+static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v11_0_hw_init(handle);
+ return gfx_v11_0_hw_init(ip_block);
}
-static bool gfx_v11_0_is_idle(void *handle)
+static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -4404,11 +4943,11 @@ static bool gfx_v11_0_is_idle(void *handle)
return true;
}
-static int gfx_v11_0_wait_for_idle(void *handle)
+static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4422,12 +4961,46 @@ static int gfx_v11_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v11_0_soft_reset(void *handle)
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- int i, j, k;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r, i, j, k;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
@@ -4436,16 +5009,11 @@ static int gfx_v11_0_soft_reset(void *handle)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- gfx_v11_0_set_safe_mode(adev, 0);
-
+ mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
- WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+ soc21_grbm_select(adev, i, k, j, 0);
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
@@ -4455,16 +5023,23 @@ static int gfx_v11_0_soft_reset(void *handle)
for (i = 0; i < adev->gfx.me.num_me; ++i) {
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
- WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+ soc21_grbm_select(adev, i, k, j, 0);
WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
}
}
}
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ r = gfx_v11_0_request_gfx_index_mutex(adev, true);
+ if (r) {
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+ return r;
+ }
WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
@@ -4474,6 +5049,14 @@ static int gfx_v11_0_soft_reset(void *handle)
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ /* release the gfx mutex */
+ r = gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ if (r) {
+ DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+ return r;
+ }
+
for (i = 0; i < adev->usec_timeout; i++) {
if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
!RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
@@ -4536,15 +5119,15 @@ static int gfx_v11_0_soft_reset(void *handle)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- gfx_v11_0_unset_safe_mode(adev, 0);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return gfx_v11_0_cp_resume(adev);
}
-static bool gfx_v11_0_check_soft_reset(void *handle)
+static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
long tmo = msecs_to_jiffies(1000);
@@ -4565,12 +5148,13 @@ static bool gfx_v11_0_check_soft_reset(void *handle)
return false;
}
-static int gfx_v11_0_post_soft_reset(void *handle)
+static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
/**
* GFX soft reset will impact MES, need resume MES when do GFX soft reset
*/
- return amdgpu_mes_resume((struct amdgpu_device *)handle);
+ return amdgpu_mes_resume(adev);
}
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
@@ -4631,15 +5215,40 @@ static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v11_0_early_init(void *handle)
+static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
- adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
- AMDGPU_MAX_COMPUTE_RINGS);
+ if (adev->gfx.disable_kq) {
+ /* We need one GFX ring temporarily to set up
+ * the clear state.
+ */
+ adev->gfx.num_gfx_rings = 1;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
gfx_v11_0_set_kiq_pm4_funcs(adev);
gfx_v11_0_set_ring_funcs(adev);
@@ -4654,9 +5263,9 @@ static int gfx_v11_0_early_init(void *handle)
return gfx_v11_0_init_microcode(adev);
}
-static int gfx_v11_0_late_init(void *handle)
+static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4667,6 +5276,14 @@ static int gfx_v11_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
return 0;
}
@@ -4951,27 +5568,35 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
- u32 reg, data;
+ u32 reg, pre_data, data;
amdgpu_gfx_off_ctrl(adev, false);
-
reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
-
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+ }
amdgpu_gfx_off_ctrl(adev, true);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && (pre_data != data)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
}
static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
@@ -5001,9 +5626,13 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
// Program RLC_PG_DELAY3 for CGPG hysteresis
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
break;
default:
@@ -5021,16 +5650,16 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
-static int gfx_v11_0_set_powergating_state(void *handle,
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
@@ -5038,6 +5667,10 @@ static int gfx_v11_0_set_powergating_state(void *handle,
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
@@ -5054,20 +5687,24 @@ static int gfx_v11_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v11_0_set_clockgating_state(void *handle,
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -5078,9 +5715,9 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_MGCG */
@@ -5144,45 +5781,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always being used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
- upper_32_bits(ring->wptr));
- }
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
@@ -5207,42 +5816,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG(); /* only DOORBELL method supported on gfx11 now */
- }
+ BUG(); /* only DOORBELL method supported on gfx11 now */
}
}
@@ -5265,7 +5846,7 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -5283,8 +5864,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
- BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
-
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw | (vmid << 24);
@@ -5295,15 +5874,11 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
if (flags & AMDGPU_IB_PREEMPTED)
control |= INDIRECT_BUFFER_PRE_RESUME(1);
- if (vmid)
+ if (vmid && !ring->adev->gfx.rs64_enable)
gfx_v11_0_ring_emit_de_meta(ring,
- (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+ !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
}
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -5323,10 +5898,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@@ -5364,11 +5935,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
PACKET3_RELEASE_MEM_GCR_GL2_WB |
- PACKET3_RELEASE_MEM_GCR_GL2_INV |
- PACKET3_RELEASE_MEM_GCR_GL2_US |
- PACKET3_RELEASE_MEM_GCR_GL1_INV |
- PACKET3_RELEASE_MEM_GCR_GLV_INV |
- PACKET3_RELEASE_MEM_GCR_GLM_INV |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
PACKET3_RELEASE_MEM_GCR_GLM_WB |
PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
@@ -5388,8 +5955,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -5417,10 +5983,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -5428,6 +5991,12 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
}
+
+ /* Make sure that we can't skip the SET_Q_MODE packets when the VM
+ * changed in any way.
+ */
+ ring->set_q_mode_offs = 0;
+ ring->set_q_mode_ptr = NULL;
}
static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
@@ -5477,16 +6046,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
u64 shadow_va, u64 csa_va,
u64 gds_va, bool init_shadow,
int vmid)
{
struct amdgpu_device *adev = ring->adev;
+ unsigned int offs, end;
- if (!adev->gfx.cp_gfx_shadow)
+ if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
return;
+ /*
+ * The logic here isn't easy to understand because we need to keep state
+ * accross multiple executions of the function as well as between the
+ * CPU and GPU. The general idea is that the newly written GPU command
+ * has a condition on the previous one and only executed if really
+ * necessary.
+ */
+
+ /*
+ * The dw in the NOP controls if the next SET_Q_MODE packet should be
+ * executed or not. Reserve 64bits just to be on the save side.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
+ offs = ring->wptr & ring->buf_mask;
+
+ /*
+ * We start with skipping the prefix SET_Q_MODE and always executing
+ * the postfix SET_Q_MODE packet. This is changed below with a
+ * WRITE_DATA command when the postfix executed.
+ */
+ amdgpu_ring_write(ring, shadow_va ? 1 : 0);
+ amdgpu_ring_write(ring, 0);
+
+ if (ring->set_q_mode_offs) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += ring->set_q_mode_offs << 2;
+ end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
+ }
+
+ /*
+ * When the postfix SET_Q_MODE packet executes we need to make sure that the
+ * next prefix SET_Q_MODE packet executes as well.
+ */
+ if (!shadow_va) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += offs << 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 0x1);
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
amdgpu_ring_write(ring, lower_32_bits(shadow_va));
amdgpu_ring_write(ring, upper_32_bits(shadow_va));
@@ -5498,33 +6132,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
amdgpu_ring_write(ring, init_shadow ?
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
-}
-static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
-{
- unsigned ret;
+ if (ring->set_q_mode_offs)
+ amdgpu_ring_patch_cond_exec(ring, end);
- amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
- ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ if (shadow_va) {
+ uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
- return ret;
-}
+ /*
+ * If the tokens match try to skip the last postfix SET_Q_MODE
+ * packet to avoid saving/restoring the state all the time.
+ */
+ if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
+ *ring->set_q_mode_ptr = 0;
-static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
+ ring->set_q_mode_token = token;
+ } else {
+ ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
+ }
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+ ring->set_q_mode_offs = offs;
}
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
@@ -5535,6 +6162,9 @@ static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
+ if (adev->enable_mes)
+ return -EINVAL;
+
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -5582,28 +6212,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -5690,19 +6305,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask, 0x20);
}
-static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- WREG32_SOC15(GC, 0, regSQ_CMD, value);
-}
-
static void
gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5840,25 +6442,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int i;
+ u32 doorbell_offset = entry->src_data[0];
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
+ int i;
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -5893,15 +6493,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5910,17 +6537,75 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5940,27 +6625,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
- drm_sched_fault(&ring->sched);
- }
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
}
- break;
- default:
- BUG();
- break;
}
}
@@ -5973,6 +6660,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6059,6 +6755,426 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+ * so the pipe reset status relies on the later gfx ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v11_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+ * reset status relies on the compute ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r = 0;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v11_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_11_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i &&
+ gc_cp_reg_list_11[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0,
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc21_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.name = "gfx_v11_0",
.early_init = gfx_v11_0_early_init,
@@ -6077,6 +7193,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,
+ .dump_ip_state = gfx_v11_ip_dump,
+ .print_ip_state = gfx_v11_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
@@ -6088,13 +7206,14 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
- .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ .emit_frame_size = /* totally 247 maximum if 16 IBs */
+ 5 + /* update_spm_vmid */
5 + /* COND_EXEC */
- 9 + /* SET_Q_PREEMPTION_MODE */
+ 22 + /* SET_Q_PREEMPTION_MODE */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
5 + /* COND_EXEC */
@@ -6103,8 +7222,10 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
+ 22 + /* SET_Q_PREEMPTION_MODE */
8 + 8 + /* FENCE x2 */
- 8, /* gfx_v11_0_emit_mem_sync */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
.emit_fence = gfx_v11_0_ring_emit_fence,
@@ -6114,19 +7235,21 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
.test_ring = gfx_v11_0_ring_test_ring,
.test_ib = gfx_v11_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v11_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v11_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v11_0_ring_soft_recovery,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
@@ -6138,6 +7261,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.get_wptr = gfx_v11_0_ring_get_wptr_compute,
.set_wptr = gfx_v11_0_ring_set_wptr_compute,
.emit_frame_size =
+ 5 + /* update_spm_vmid */
20 + /* gfx_v11_0_ring_emit_gds_switch */
7 + /* gfx_v11_0_ring_emit_hdp_flush */
5 + /* hdp invalidate */
@@ -6146,7 +7270,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v11_0_emit_mem_sync */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
.emit_fence = gfx_v11_0_ring_emit_fence,
@@ -6156,12 +7281,16 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
.test_ring = gfx_v11_0_ring_test_ring,
.test_ib = gfx_v11_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v11_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
@@ -6179,7 +7308,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
7 + /* gfx_v11_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
@@ -6192,6 +7320,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
};
static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -6217,6 +7346,11 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
.process = gfx_v11_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
+ .set = gfx_v11_0_set_bad_op_fault_state,
+ .process = gfx_v11_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
.set = gfx_v11_0_set_priv_inst_fault_state,
.process = gfx_v11_0_priv_inst_irq,
@@ -6234,6 +7368,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
@@ -6345,6 +7482,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
mask = 1;
counter = 0;
gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
@@ -6368,7 +7508,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
* SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
* SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
*/
- cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask)