summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c2918
1 files changed, 1710 insertions, 1208 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index dbe7442fb25c..d75b9940f248 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,11 +40,11 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -53,13 +53,9 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
-#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
#define GFX10_MEC_HPD_SIZE 2048
-#define RLCG_VFGATE_DISABLED 0x4000000
-#define RLCG_WRONG_OPERATION_TYPE 0x2000000
-#define RLCG_NOT_IN_RANGE 0x1000000
-
#define F32_CE_PROGRAM_RAM_SIZE 65536
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -106,10 +102,21 @@
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish 0x0105
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish 0x0106
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish_BASE_IDX 1
+
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1
+
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1
+
#define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441
#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1
#define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261
@@ -180,14 +187,6 @@
#define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5
#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1
-#define GFX_RLCG_GC_WRITE_OLD (0x8 << 28)
-#define GFX_RLCG_GC_WRITE (0x0 << 28)
-#define GFX_RLCG_GC_READ (0x1 << 28)
-#define GFX_RLCG_MMHUB_WRITE (0x2 << 28)
-
-#define RLCG_ERROR_REPORT_ENABLED(adev) \
- (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
-
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -256,13 +255,6 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin");
-MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin");
-
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
@@ -270,8 +262,225 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin");
-static const struct soc15_reg_golden golden_settings_gc_10_1[] =
-{
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
@@ -314,13 +523,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -1375,8 +1582,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
@@ -1417,8 +1623,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
@@ -1463,150 +1668,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
};
-static bool gfx_v10_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip,
- int write, u32 *rlcg_flag)
-{
- switch (hwip) {
- case GC_HWIP:
- if (amdgpu_sriov_reg_indirect_gc(adev)) {
- *rlcg_flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
-
- return true;
- /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */
- } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ)) {
- *rlcg_flag = GFX_RLCG_GC_WRITE_OLD;
-
- return true;
- }
-
- break;
- case MMHUB_HWIP:
- if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
- (acc_flags & AMDGPU_REGS_RLC) && write) {
- *rlcg_flag = GFX_RLCG_MMHUB_WRITE;
- return true;
- }
-
- break;
- default:
- DRM_DEBUG("Not program register by RLCG\n");
- }
-
- return false;
-}
-
-static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)
-{
- static void *scratch_reg0;
- static void *scratch_reg1;
- static void *scratch_reg2;
- static void *scratch_reg3;
- static void *spare_int;
- static uint32_t grbm_cntl;
- static uint32_t grbm_idx;
- uint32_t i = 0;
- uint32_t retries = 50000;
- u32 ret = 0;
- u32 tmp;
-
- scratch_reg0 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4;
- scratch_reg1 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1) * 4;
- scratch_reg2 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4;
- scratch_reg3 = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4;
-
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
- spare_int = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX]
- + mmRLC_SPARE_INT_0_Sienna_Cichlid) * 4;
- } else {
- spare_int = adev->rmmio +
- (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4;
- }
-
- grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
- grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
-
- if (offset == grbm_cntl || offset == grbm_idx) {
- if (offset == grbm_cntl)
- writel(v, scratch_reg2);
- else if (offset == grbm_idx)
- writel(v, scratch_reg3);
-
- writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
- } else {
- writel(v, scratch_reg0);
- writel(offset | flag, scratch_reg1);
- writel(1, spare_int);
-
- for (i = 0; i < retries; i++) {
- tmp = readl(scratch_reg1);
- if (!(tmp & flag))
- break;
-
- udelay(10);
- }
-
- if (i >= retries) {
- if (RLCG_ERROR_REPORT_ENABLED(adev)) {
- if (tmp & RLCG_VFGATE_DISABLED)
- pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset);
- else if (tmp & RLCG_WRONG_OPERATION_TYPE)
- pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset);
- else if (tmp & RLCG_NOT_IN_RANGE)
- pr_err("The register is not in range, program reg:0x%05x failed!\n", offset);
- else
- pr_err("Unknown error type, program reg:0x%05x failed!\n", offset);
- } else
- pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset);
- }
- }
-
- ret = readl(scratch_reg0);
-
- return ret;
-}
-
-static void gfx_v10_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip)
-{
- u32 rlcg_flag;
-
- if (!amdgpu_sriov_runtime(adev) &&
- gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) {
- gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag);
- return;
- }
-
- if (acc_flags & AMDGPU_REGS_NO_KIQ)
- WREG32_NO_KIQ(offset, value);
- else
- WREG32(offset, value);
-}
-
-static u32 gfx_v10_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip)
-{
- u32 rlcg_flag;
-
- if (!amdgpu_sriov_runtime(adev) &&
- gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag))
- return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag);
-
- if (acc_flags & AMDGPU_REGS_NO_KIQ)
- return RREG32_NO_KIQ(offset);
- else
- return RREG32(offset);
-}
-
-static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -2229,13 +2295,11 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
@@ -3290,8 +3354,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
@@ -3300,7 +3363,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
- SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
@@ -3337,13 +3400,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = {
/* Pending on emulation bring up */
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
@@ -3390,8 +3451,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
@@ -3421,15 +3481,14 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_3[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
@@ -3445,8 +3504,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
};
-static const struct soc15_reg_golden golden_settings_gc_10_3_4[] =
-{
+static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100),
@@ -3516,7 +3574,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX,0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
@@ -3557,6 +3615,56 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
};
+static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -3570,11 +3678,12 @@ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
+ u32 sh_num, u32 instance, int xcc_id);
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
@@ -3587,16 +3696,29 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
-
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -3604,10 +3726,23 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
@@ -3680,12 +3815,59 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub)
{
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(kiq_ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+ gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
@@ -3694,6 +3876,7 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -3703,12 +3886,12 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs;
}
static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_rlc_spm_10_0_nv10,
@@ -3731,7 +3914,10 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_10_1,
@@ -3781,8 +3967,8 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
break;
case IP_VERSION(10, 3, 4):
soc15_program_register_sequence(adev,
- golden_settings_gc_10_3_4,
- (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
+ golden_settings_gc_10_3_4,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
break;
case IP_VERSION(10, 3, 5):
soc15_program_register_sequence(adev,
@@ -3790,23 +3976,27 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_5));
break;
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
soc15_program_register_sequence(adev,
golden_settings_gc_10_0_cyan_skillfish,
(const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish));
break;
+ case IP_VERSION(10, 3, 6):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_6,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6));
+ break;
+ case IP_VERSION(10, 3, 7):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_7,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7));
+ break;
default:
break;
}
gfx_v10_0_init_spm_golden_registers(adev);
}
-static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
@@ -3843,29 +4033,22 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
- unsigned i;
+ unsigned int i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
- return r;
- }
-
WREG32(scratch, 0xCAFEDEAD);
-
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -3882,8 +4065,6 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- amdgpu_gfx_scratch_free(adev, scratch);
-
return r;
}
@@ -3892,22 +4073,26 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- unsigned index;
+ unsigned int index;
uint64_t gpu_addr;
- uint32_t tmp;
+ uint32_t *cpu_ptr;
long r;
+ memset(&ib, 0, sizeof(ib));
+
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
- if (r)
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
+ }
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
@@ -3928,13 +4113,12 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err2;
}
- tmp = adev->wb.wb[index];
- if (tmp == 0xDEADBEEF)
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -3943,18 +4127,12 @@ err1:
static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
{
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
kfree(adev->gfx.rlc.register_list_format);
}
@@ -3963,11 +4141,12 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.cp_fw_write_wait = false;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
if ((adev->gfx.me_fw_version >= 0x00000046) &&
(adev->gfx.me_feature_version >= 27) &&
(adev->gfx.pfp_fw_version >= 0x00000068) &&
@@ -3981,7 +4160,9 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.cp_fw_write_wait = true;
break;
default:
@@ -3992,39 +4173,6 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
DRM_WARN_ONCE("CP firmware version too old, please update!");
}
-
-static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
-static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_2 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
- adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
- adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
- adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
-}
-
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@@ -4039,12 +4187,12 @@ static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
break;
}
- return ret ;
+ return ret;
}
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@@ -4056,300 +4204,87 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
- char fw_name[40];
- char *wks = "";
+ char fw_name[53];
+ char ucode_prefix[30];
+ const char *wks = "";
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
DRM_DEBUG("\n");
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 1, 10):
- chip_name = "navi10";
- break;
- case IP_VERSION(10, 1, 1):
- chip_name = "navi14";
- if (!(adev->pdev->device == 0x7340 &&
- adev->pdev->revision != 0x00))
- wks = "_wks";
- break;
- case IP_VERSION(10, 1, 2):
- chip_name = "navi12";
- break;
- case IP_VERSION(10, 3, 0):
- chip_name = "sienna_cichlid";
- break;
- case IP_VERSION(10, 3, 2):
- chip_name = "navy_flounder";
- break;
- case IP_VERSION(10, 3, 1):
- chip_name = "vangogh";
- break;
- case IP_VERSION(10, 3, 4):
- chip_name = "dimgrey_cavefish";
- break;
- case IP_VERSION(10, 3, 5):
- chip_name = "beige_goby";
- break;
- case IP_VERSION(10, 3, 3):
- chip_name = "yellow_carp";
- break;
- case IP_VERSION(10, 1, 3):
- if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
- chip_name = "cyan_skillfish2";
- else
- chip_name = "cyan_skillfish";
- break;
- default:
- BUG();
- }
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) &&
+ (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+ wks = "_wks";
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+
+ /* don't validate this firmware. There are apparently firmwares
+ * in the wild with incorrect size in the header
+ */
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (version_major == 2) {
- if (version_minor >= 1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
- if (version_minor == 2)
- gfx_v10_0_init_rlc_iram_dram_microcode(adev);
- }
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
- err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
- if (err)
- goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
err = 0;
adev->gfx.mec2_fw = NULL;
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- if (info->fw) {
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- if (adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
-
- if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
- adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
- }
- }
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
- }
-
gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
- dev_err(adev->dev,
- "gfx10: Failed to load firmware \"%s\"\n",
- fw_name);
- release_firmware(adev->gfx.pfp_fw);
- adev->gfx.pfp_fw = NULL;
- release_firmware(adev->gfx.me_fw);
- adev->gfx.me_fw = NULL;
- release_firmware(adev->gfx.ce_fw);
- adev->gfx.ce_fw = NULL;
- release_firmware(adev->gfx.rlc_fw);
- adev->gfx.rlc_fw = NULL;
- release_firmware(adev->gfx.mec_fw);
- adev->gfx.mec_fw = NULL;
- release_firmware(adev->gfx.mec2_fw);
- adev->gfx.mec2_fw = NULL;
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
}
gfx_v10_0_check_gfxoff_flag(adev);
@@ -4387,12 +4322,9 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4400,39 +4332,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4448,6 +4356,30 @@ static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid);
+ break;
+ default:
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ break;
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
{
const struct cs_section_def *cs_data;
@@ -4464,10 +4396,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- /* init spm vmid with 0xf */
- if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
-
return 0;
}
@@ -4477,19 +4405,11 @@ static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
}
-static int gfx_v10_0_me_init(struct amdgpu_device *adev)
+static void gfx_v10_0_me_init(struct amdgpu_device *adev)
{
- int r;
-
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
amdgpu_gfx_graphics_queue_acquire(adev);
-
- r = gfx_v10_0_init_microcode(adev);
- if (r)
- DRM_ERROR("Failed to load gfx firmware!\n");
-
- return r;
}
static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
@@ -4497,13 +4417,13 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
int r;
u32 *hpd;
const __le32 *fw_data = NULL;
- unsigned fw_size;
+ unsigned int fw_size;
u32 *fw = NULL;
size_t mec_hpd_size;
const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -4575,11 +4495,12 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* in gfx10 the SIMD_ID is specified as part of the INSTANCE
* field when performing a select_se_sh so it should be
- * zero here */
+ * zero here
+ */
WARN_ON(simd != 0);
/* type 2 wave data */
@@ -4602,7 +4523,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4613,7 +4534,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
dst);
}
-static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -4624,7 +4545,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
nv_grbm_select(adev, me, pipe, q, vm);
}
@@ -4660,9 +4581,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
-
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -4678,7 +4597,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -4689,6 +4610,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
break;
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -4727,9 +4649,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
- int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -4744,21 +4666,20 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
- r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
- if (r)
- return r;
- return 0;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
- int r;
- unsigned irq_type;
+ unsigned int irq_type;
struct amdgpu_ring *ring;
unsigned int hw_prio;
@@ -4774,36 +4695,78 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX10_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
- r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
- if (r)
- return r;
+}
- return 0;
+static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
}
-static int gfx_v10_0_sw_init(void *handle)
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4813,10 +4776,12 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4830,11 +4795,79 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
- &adev->gfx.kiq.irq);
+ &adev->gfx.kiq[0].irq);
if (r)
return r;
@@ -4845,6 +4878,13 @@ static int gfx_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -4859,16 +4899,16 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- gfx_v10_0_scratch_init(adev);
-
- r = gfx_v10_0_me_init(adev);
- if (r)
- return r;
+ gfx_v10_0_me_init(adev);
- r = gfx_v10_0_rlc_init(adev);
- if (r) {
- DRM_ERROR("Failed to init rlc BOs!\n");
- return r;
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
}
r = gfx_v10_0_mec_init(adev);
@@ -4879,7 +4919,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4898,8 +4938,8 @@ static int gfx_v10_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
- j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v10_0_compute_ring_init(adev, ring_id,
@@ -4912,18 +4952,27 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@@ -4938,6 +4987,12 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
+ gfx_v10_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -4962,19 +5017,22 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_ptr);
}
-static int gfx_v10_0_sw_fini(void *handle)
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@@ -4986,12 +5044,17 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
gfx_v10_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -5046,17 +5109,21 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v10_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
@@ -5073,8 +5140,9 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
uint32_t pa_sc_tile_steering_override;
/* for ASICs that integrates GFX v10.3
- * pa_sc_tile_steering_override should be set to 0 */
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ * pa_sc_tile_steering_override should be set to 0
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
return 0;
/* init num_sc */
@@ -5103,6 +5171,29 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@@ -5126,14 +5217,19 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
}
+
+ gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+ AMDGPU_NUM_VMID);
}
static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
@@ -5185,7 +5281,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
/*
* Set corresponding TCP bits for the inactive WGPs in
@@ -5218,7 +5314,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -5227,7 +5323,7 @@ static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
/* TCCs are global (not instanced). */
uint32_t tcc_disable;
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) {
tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) |
RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3);
} else {
@@ -5245,7 +5341,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
@@ -5256,7 +5353,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
nv_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@@ -5277,26 +5374,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
}
+static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
@@ -5304,7 +5449,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
@@ -5360,8 +5505,10 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
{
- /* TODO: enable rlc & smu handshake until smu
- * and gfxoff feature works as expected */
+ /*
+ * TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected
+ */
if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
@@ -5384,7 +5531,7 @@ static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_0 *hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
if (!adev->gfx.rlc_fw)
return -EINVAL;
@@ -5421,6 +5568,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
gfx_v10_0_init_csb(adev);
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
} else {
@@ -5451,6 +5600,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
gfx_v10_0_init_csb(adev);
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
adev->gfx.rlc.funcs->start(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -5459,6 +5610,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
}
+
return 0;
}
@@ -5926,11 +6078,13 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
- } else {
+ else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- }
+
+ if (amdgpu_in_reset(adev) && !enable)
+ return 0;
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
@@ -5949,7 +6103,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *pfp_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -5998,7 +6152,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -6027,7 +6181,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *ce_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -6076,7 +6230,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -6104,7 +6258,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
int r;
const struct gfx_firmware_header_v1_0 *me_hdr;
const __le32 *fw_data;
- unsigned i, fw_size;
+ unsigned int i, fw_size;
uint32_t tmp;
uint32_t usec_timeout = 50000; /* wait for 50ms */
@@ -6153,7 +6307,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6315,13 +6469,15 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
}
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
@@ -6346,7 +6502,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
- u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
@@ -6373,13 +6528,13 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6411,12 +6566,12 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ /* Set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6441,24 +6596,21 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v10_0_cp_gfx_start(adev);
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-
return 0;
}
static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
if (enable) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
break;
default:
@@ -6466,13 +6618,15 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
break;
}
} else {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
@@ -6483,7 +6637,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
break;
}
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -6492,7 +6646,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
{
const struct gfx_firmware_header_v1_0 *mec_hdr;
const __le32 *fw_data;
- unsigned i;
+ unsigned int i;
u32 tmp;
u32 usec_timeout = 50000; /* Wait for 50 ms */
@@ -6528,7 +6682,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6564,35 +6718,51 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
-static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v10_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_gfx_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr;
uint32_t tmp;
uint32_t rb_bufsz;
@@ -6602,8 +6772,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_wptr_hi = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
@@ -6617,11 +6787,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
@@ -6629,23 +6796,23 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
mqd->cp_gfx_hqd_quantum = tmp;
/* set up gfx hqd base. this is similar as CP_RB_BASE */
- hqd_gpu_addr = ring->gpu_addr >> 8;
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_gfx_hqd_base = hqd_gpu_addr;
mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
mqd->cp_gfx_hqd_rptr_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up rb_wptr_poll addr */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
- rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
@@ -6656,9 +6823,9 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
/* set up cp_doorbell_control */
tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
- if (ring->use_doorbell) {
+ if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
+ DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_EN, 1);
} else
@@ -6666,13 +6833,7 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
DOORBELL_EN, 0);
mqd->cp_rb_doorbell_control = tmp;
- /*if there are 2 gfx rings, set the lower doorbell range of the first ring,
- *otherwise the range of the second ring will override the first ring */
- if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
- gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
/* active the queue */
@@ -6681,175 +6842,71 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
return 0;
}
-#ifdef BRING_UP_DEBUG
-static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
-
- /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
-
- /* set GFX_MQD_BASE */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set GFX_MQD_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
-
- /* set GFX_HQD_VMID to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
-
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY,
- mqd->cp_gfx_hqd_queue_priority);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
-
- /* set GFX_HQD_BASE, similar as CP_RB_BASE */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
-
- /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
-
- /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
-
- /* set RB_WPTR_POLL_ADDR */
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
-
- /* set RB_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
-
- /* active the queue */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
-
- return 0;
-}
-#endif
-
-static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_mqd_init(ring);
-#ifdef BRING_UP_DEBUG
- gfx_v10_0_gfx_queue_init_register(ring);
-#endif
+ amdgpu_ring_init_mqd(ring);
+
+ /*
+ * if there are 2 gfx rings, set the lower doorbell
+ * range of the first ring, otherwise the range of
+ * the second ring will override the first ring
+ */
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) {
- /* reset mqd with the backup copy */
- if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
- /* reset the ring */
- ring->wptr = 0;
- adev->wb.wb[ring->wptr_offs] = 0;
- amdgpu_ring_clear_ring(ring);
-#ifdef BRING_UP_DEBUG
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_queue_init_register(ring);
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
-#endif
- } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
amdgpu_ring_clear_ring(ring);
}
return 0;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r, i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
- adev->gfx.num_gfx_rings);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
- goto done;
+ return r;
}
-#ifndef BRING_UP_DEBUG
- r = gfx_v10_0_kiq_enable_kgq(adev);
- if (r)
- goto done;
-#endif
- r = gfx_v10_0_cp_gfx_start(adev);
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-done:
- return r;
+ return gfx_v10_0_cp_gfx_start(adev);
}
-static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd)
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
- mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
- mqd->cp_hqd_queue_priority =
- AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
- }
- }
-}
-
-static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ struct v10_compute_mqd *mqd = m;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
@@ -6861,7 +6918,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
- eop_base_addr = ring->eop_gpu_addr >> 8;
+ eop_base_addr = prop->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -6875,9 +6932,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
/* enable doorbell? */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- if (ring->use_doorbell) {
+ if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
+ DOORBELL_OFFSET, prop->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -6892,15 +6949,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_doorbell_control = tmp;
/* disable the queue if it's active */
- ring->wptr = 0;
mqd->cp_hqd_dequeue_request = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
@@ -6908,55 +6964,38 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
- (order_base_2(ring->ring_size / 4) - 1));
+ (order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = prop->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = prop->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- tmp = 0;
- /* enable the doorbell if requested */
- if (ring->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
-
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
-
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
@@ -6972,13 +7011,10 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_ib_control = tmp;
/* set static priority for a compute queue/ring */
- gfx_v10_0_compute_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
- /* map_queues packet doesn't need activate the queue,
- * so only kiq need set this field.
- */
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- mqd->cp_hqd_active = 1;
+ mqd->cp_hqd_active = prop->hqd_active;
return 0;
}
@@ -6996,20 +7032,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
- /* write the EOP addr */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
- mqd->cp_hqd_eop_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
- mqd->cp_hqd_eop_base_addr_hi);
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
- mqd->cp_hqd_eop_control);
-
- /* enable doorbell? */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->cp_hqd_pq_doorbell_control);
-
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
@@ -7028,6 +7050,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_wptr_hi);
}
+ /* disable doorbells */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
/* set the pointer to the MQD */
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);
@@ -7097,14 +7132,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -7117,46 +7151,45 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_compute_mqd_init(ring);
+ amdgpu_ring_init_mqd(ring);
gfx_v10_0_kiq_init_register(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
}
-static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_compute_mqd_init(ring);
+ amdgpu_ring_init_mqd(ring);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
-
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
- amdgpu_ring_clear_ring(ring);
- } else {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
@@ -7165,54 +7198,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
- return r;
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -7279,9 +7282,11 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
{
uint32_t data, pattern = 0xDEADBEEF;
- /* check if mmVGT_ESGS_RING_SIZE_UMD
- * has been remapped to mmVGT_ESGS_RING_SIZE */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ /*
+ * check if mmVGT_ESGS_RING_SIZE_UMD
+ * has been remapped to mmVGT_ESGS_RING_SIZE
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 4):
@@ -7291,15 +7296,15 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
- return false;
}
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
return true;
default:
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
@@ -7309,12 +7314,12 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
- return false;
}
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
break;
}
+
+ return false;
}
static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
@@ -7324,17 +7329,21 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return;
- /* initialize cam_index to 0
- * index will auto-inc after each data writting */
+ /*
+ * Initialize cam_index to 0
+ * index will auto-inc after each data writing
+ */
WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
@@ -7453,6 +7462,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
{
uint32_t data;
+
data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG);
data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data);
@@ -7462,25 +7472,26 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
}
-static int gfx_v10_0_hw_init(void *handle)
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/**
* For gfx 10, rlc firmware loading relies on smu firmware is
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (!(adev->flags & AMD_IS_APU)) {
- r = amdgpu_pm_load_smu_firmware(adev, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
gfx_v10_0_disable_gpa_mode(adev);
}
@@ -7498,80 +7509,56 @@ static int gfx_v10_0_hw_init(void *handle)
* init golden registers and rlc resume may override some registers,
* reconfig them here
*/
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 10) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
gfx_v10_0_tcp_harvest(adev);
r = gfx_v10_0_cp_resume(adev);
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
gfx_v10_3_program_pbb_mode(adev);
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev))
gfx_v10_3_set_power_brake_sequence(adev);
return r;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i;
+ struct amdgpu_device *adev = ip_block->adev;
- if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
- return -EINVAL;
-
- if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_gfx_rings))
- return -ENOMEM;
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
- PREEMPT_QUEUES, 0, 0);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
-static int gfx_v10_0_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
- uint32_t tmp;
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+
+ /* WA added for Vangogh asic fixing the SMU suspend failure
+ * It needs to set power gating again during gfxoff control
+ * otherwise the gfxoff disallowing will be failed to set.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
-#ifndef BRING_UP_DEBUG
if (amdgpu_async_gfx_ring) {
- r = gfx_v10_0_kiq_disable_kgq(adev);
- if (r)
+ if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
-#endif
- if (amdgpu_gfx_disable_kcq(adev))
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
}
if (amdgpu_sriov_vf(adev)) {
gfx_v10_0_cp_gfx_enable(adev, false);
- /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
- tmp &= 0xffffff00;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- } else {
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
- tmp &= 0xffffff00;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- }
-
+ /* Remove the steps of clearing KIQ position.
+ * It causes GFX hang when another Win guest is rendering.
+ */
return 0;
}
gfx_v10_0_cp_enable(adev, false);
@@ -7580,19 +7567,19 @@ static int gfx_v10_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v10_0_suspend(void *handle)
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_fini(handle);
+ return gfx_v10_0_hw_fini(ip_block);
}
-static int gfx_v10_0_resume(void *handle)
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_init(handle);
+ return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -7601,11 +7588,11 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
}
-static int gfx_v10_0_wait_for_idle(void *handle)
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- unsigned i;
+ unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -7619,11 +7606,11 @@ static int gfx_v10_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v10_0_soft_reset(void *handle)
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -7648,12 +7635,13 @@ static int gfx_v10_0_soft_reset(void *handle)
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
@@ -7680,19 +7668,17 @@ static int gfx_v10_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v10_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -7704,9 +7690,26 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock, clock_lo, clock_hi, hi_check;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
preempt_disable();
clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
@@ -7721,6 +7724,21 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
preempt_enable();
clock = clock_lo | (clock_hi << 32ULL);
break;
+ case IP_VERSION(10, 3, 6):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
default:
preempt_disable();
clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
@@ -7769,15 +7787,18 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v10_0_early_init(void *handle)
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
break;
case IP_VERSION(10, 3, 0):
@@ -7785,7 +7806,9 @@ static int gfx_v10_0_early_init(void *handle)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
break;
default:
@@ -7800,13 +7823,17 @@ static int gfx_v10_0_early_init(void *handle)
gfx_v10_0_set_irq_funcs(adev);
gfx_v10_0_set_gds_init(adev);
gfx_v10_0_set_rlc_funcs(adev);
+ gfx_v10_0_set_mqd_funcs(adev);
- return 0;
+ /* init rlcg reg access ctrl */
+ gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v10_0_init_microcode(adev);
}
-static int gfx_v10_0_late_init(void *handle)
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -7817,6 +7844,10 @@ static int gfx_v10_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
return 0;
}
@@ -7829,21 +7860,23 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
}
-static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
- unsigned i;
+ unsigned int i;
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
/* wait for RLC_SAFE_MODE */
@@ -7868,18 +7901,20 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
break;
default:
@@ -8182,7 +8217,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d
mmCGTS_SA1_QUAD1_SM_CTRL_REG
};
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
tcp_ctrl_regs_nv12[i];
@@ -8213,7 +8248,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d
static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* enable FGCG firstly*/
@@ -8227,9 +8262,12 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)))
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 10)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 1)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 2)))
gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
@@ -8252,31 +8290,39 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
AMD_CG_SUPPORT_GFX_3D_CGLS))
gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
-static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
{
- u32 reg, data;
-
- amdgpu_gfx_off_ctrl(adev, false);
+ u32 reg, pre_data, data;
- /* not for *_SOC15 */
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ /* not for *_SOC15 */
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ }
+}
+
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v10_0_update_spm_vmid_internal(adev, vmid);
amdgpu_gfx_off_ctrl(adev, true);
}
@@ -8330,9 +8376,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
* Power/performance team will optimize it and might give a new value later.
*/
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
break;
@@ -8344,11 +8392,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
gfx_v10_cntl_power_gating(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
@@ -8377,21 +8425,19 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.reset = gfx_v10_0_rlc_reset,
.start = gfx_v10_0_rlc_start,
.update_spm_vmid = gfx_v10_0_update_spm_vmid,
- .sriov_wreg = gfx_v10_sriov_wreg,
- .sriov_rreg = gfx_v10_sriov_rreg,
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -8403,8 +8449,16 @@ static int gfx_v10_0_set_powergating_state(void *handle,
break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
gfx_v10_cntl_pg(adev, enable);
- amdgpu_gfx_off_ctrl(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
break;
default:
break;
@@ -8412,15 +8466,15 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -8429,7 +8483,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -8439,9 +8495,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -8485,7 +8541,8 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
+ /* gfx10 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -8495,7 +8552,7 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
} else {
wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@@ -8510,17 +8567,21 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
+ /* gfx10 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
}
static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@@ -8529,7 +8590,7 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
else
BUG();
return wptr;
@@ -8539,9 +8600,9 @@ static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- /* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
@@ -8567,7 +8628,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -8582,7 +8643,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
uint32_t flags)
{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -8592,7 +8653,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
if (flags & AMDGPU_IB_PREEMPTED)
@@ -8619,7 +8680,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
uint32_t flags)
{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
/* Currently, there is a high possibility to get wave ID mismatch
@@ -8650,7 +8711,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
}
static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
- u64 seq, unsigned flags)
+ u64 seq, unsigned int flags)
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
@@ -8692,8 +8753,20 @@ static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
upper_32_bits(addr), seq, 0xffffffff, 4);
}
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
@@ -8743,7 +8816,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
{
uint32_t dw2 = 0;
- if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev))
+ if (ring->adev->gfx.mcbp)
gfx_v10_0_ring_emit_ce_meta(ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
@@ -8772,38 +8845,28 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
- unsigned ret;
+ unsigned int ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
@@ -8850,26 +8913,26 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_ce_ib_state ce_payload = {0};
- uint64_t csa_addr;
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, ce_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, ce_payload)));
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
- offsetof(struct v10_gfx_meta_data,
- ce_payload),
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
sizeof(ce_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
@@ -8880,12 +8943,18 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
{
struct amdgpu_device *adev = ring->adev;
struct v10_de_ib_state de_payload = {0};
- uint64_t csa_addr, gds_addr;
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
int cnt;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
- gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
+
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -8895,15 +8964,11 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, de_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr +
- offsetof(struct v10_gfx_meta_data, de_payload)));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
- offsetof(struct v10_gfx_meta_data,
- de_payload),
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
sizeof(de_payload) >> 2);
else
amdgpu_ring_write_multiple(ring, (void *)&de_payload,
@@ -8983,19 +9048,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
-static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- WREG32_SOC15(GC, 0, mmSQ_CMD, value);
-}
-
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -9093,7 +9145,7 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
switch (type) {
@@ -9142,6 +9194,7 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
DRM_DEBUG("IH: CP EOP\n");
+
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
@@ -9158,27 +9211,58 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
- * The interrupt can only be enabled/disabled per pipe instead of per queue.
- */
- if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
amdgpu_fence_process(ring);
}
break;
}
+
return 0;
}
static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -9187,17 +9271,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -9221,8 +9363,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
case 0:
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
@@ -9249,6 +9391,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -9264,7 +9415,7 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
uint32_t tmp, target;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
if (ring->me == 1)
target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
@@ -9308,7 +9459,7 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -9343,6 +9494,319 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ u32 tmp;
+ u64 addr;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ addr = amdgpu_bo_gpu_offset(ring->mqd_obj) +
+ offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active);
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (ring->pipe == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue);
+
+ gfx_v10_0_ring_emit_wreg(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+ gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ 0, 1, 0x20);
+ gfx_v10_0_ring_emit_reg_wait(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kgq_init_queue(ring, true);
+ if (r) {
+ DRM_ERROR("fail to init kgq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+ return r;
+ }
+
+ r = gfx_v10_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_10_1[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ nv_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9359,6 +9823,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.set_clockgating_state = gfx_v10_0_set_clockgating_state,
.set_powergating_state = gfx_v10_0_set_powergating_state,
.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+ .dump_ip_state = gfx_v10_ip_dump,
+ .print_ip_state = gfx_v10_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
@@ -9366,7 +9832,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
+ .secure_submission_supported = true,
.get_rptr = gfx_v10_0_ring_get_rptr_gfx,
.get_wptr = gfx_v10_0_ring_get_wptr_gfx,
.set_wptr = gfx_v10_0_ring_set_wptr_gfx,
@@ -9375,7 +9841,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -9391,7 +9857,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9401,19 +9868,21 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -9421,7 +9890,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -9434,7 +9902,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9444,12 +9913,16 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -9457,7 +9930,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
.support_64bit_ptrs = true,
- .vmhub = AMDGPU_GFXHUB_0,
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
@@ -9468,7 +9940,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
@@ -9481,13 +9952,14 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
@@ -9506,6 +9978,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
.process = gfx_v10_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = {
+ .set = gfx_v10_0_set_bad_op_fault_state,
+ .process = gfx_v10_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
.set = gfx_v10_0_set_priv_inst_fault_state,
.process = gfx_v10_0_priv_inst_irq,
@@ -9521,27 +9998,33 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
- adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+ adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs;
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
}
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
case IP_VERSION(10, 1, 2):
@@ -9555,7 +10038,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
{
- unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ unsigned int total_cu = adev->gfx.config.max_cu_per_sh *
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines;
@@ -9565,6 +10048,20 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
adev->gds.oa_size = 16;
}
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v10_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v10_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v10_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v10_0_compute_mqd_init;
+}
+
static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
u32 bitmap)
{
@@ -9622,7 +10119,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
{
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
- unsigned disable_masks[4 * 2];
+ unsigned int disable_masks[4 * 2];
if (!adev || !cu_info)
return -EINVAL;
@@ -9633,19 +10130,25 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 7))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
@@ -9661,7 +10164,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
@@ -9737,8 +10240,7 @@ static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev)
(0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT));
}
-const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version gfx_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 10,
.minor = 0,