summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c1465
1 files changed, 1071 insertions, 394 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f90905ef32c7..75ea071744eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,11 +40,11 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -276,6 +276,210 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+};
+
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
@@ -3498,17 +3702,23 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -3558,14 +3768,8 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
- if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
- amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
- return;
- }
-
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
@@ -3614,12 +3818,65 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
}
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -3823,33 +4080,18 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -3876,12 +4118,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -3964,7 +4204,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
- char fw_name[40];
+ char fw_name[53];
char ucode_prefix[30];
const char *wks = "";
int err;
@@ -3979,20 +4219,23 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
wks = "_wks";
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
@@ -4014,15 +4257,17 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
@@ -4080,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4090,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4490,11 +4709,55 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
-static int gfx_v10_0_sw_init(void *handle)
+static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
int xcc_id = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
@@ -4504,7 +4767,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4518,8 +4781,8 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4533,6 +4796,74 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
@@ -4548,6 +4879,13 @@ static int gfx_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -4582,7 +4920,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4614,19 +4952,22 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
}
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!adev->enable_mes_kiq) {
- r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
- if (r) {
- DRM_ERROR("Failed to init KIQ BOs!\n");
- return r;
- }
-
- r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
- if (r)
- return r;
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
}
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+
r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@@ -4642,6 +4983,12 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
+ gfx_v10_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -4666,10 +5013,10 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_ptr);
}
-static int gfx_v10_0_sw_fini(void *handle)
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4678,10 +5025,10 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_gfx_mqd_sw_fini(adev, 0);
- if (!adev->enable_mes_kiq) {
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
- amdgpu_gfx_kiq_fini(adev, 0);
- }
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@@ -4693,6 +5040,11 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
gfx_v10_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
@@ -5018,26 +5370,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
}
+static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
@@ -5679,7 +6079,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- if (adev->job_hang && !enable)
+ if (amdgpu_in_reset(adev) && !enable)
return 0;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -5748,7 +6148,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -5826,7 +6226,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -5903,7 +6303,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6124,7 +6524,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -6162,7 +6562,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
+ /* Set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -6278,7 +6678,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6326,17 +6726,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
@@ -6442,13 +6838,13 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
return 0;
}
-static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6489,22 +6885,9 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -6780,13 +7163,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
return 0;
}
-static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6811,55 +7194,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -6881,10 +7233,7 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
return r;
}
- if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
- r = amdgpu_mes_kiq_hw_init(adev);
- else
- r = gfx_v10_0_kiq_resume(adev);
+ r = gfx_v10_0_kiq_resume(adev);
if (r)
return r;
@@ -7119,25 +7468,26 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
}
-static int gfx_v10_0_hw_init(void *handle)
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/**
* For gfx 10, rlc firmware loading relies on smu firmware is
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (!(adev->flags & AMD_IS_APU)) {
- r = amdgpu_pm_load_smu_firmware(adev, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
gfx_v10_0_disable_gpa_mode(adev);
}
@@ -7173,19 +7523,22 @@ static int gfx_v10_0_hw_init(void *handle)
return r;
}
-static int gfx_v10_0_hw_fini(void *handle)
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
/* WA added for Vangogh asic fixing the SMU suspend failure
* It needs to set power gating again during gfxoff control
* otherwise the gfxoff disallowing will be failed to set.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
- gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE);
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -7210,19 +7563,19 @@ static int gfx_v10_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v10_0_suspend(void *handle)
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_fini(handle);
+ return gfx_v10_0_hw_fini(ip_block);
}
-static int gfx_v10_0_resume(void *handle)
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_init(handle);
+ return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -7231,11 +7584,11 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
}
-static int gfx_v10_0_wait_for_idle(void *handle)
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -7249,11 +7602,11 @@ static int gfx_v10_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v10_0_soft_reset(void *handle)
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -7432,9 +7785,9 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v10_0_early_init(void *handle)
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
@@ -7476,9 +7829,9 @@ static int gfx_v10_0_early_init(void *handle)
return gfx_v10_0_init_microcode(adev);
}
-static int gfx_v10_0_late_init(void *handle)
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -7489,6 +7842,10 @@ static int gfx_v10_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
return 0;
}
@@ -7939,15 +8296,24 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid)
{
- u32 data;
+ u32 reg, pre_data, data;
+ reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
/* not for *_SOC15 */
- data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
+ else
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ }
}
static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
@@ -8060,10 +8426,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -8098,10 +8464,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -8127,9 +8493,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -8196,45 +8562,17 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always being used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
- upper_32_bits(ring->wptr));
- }
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
@@ -8259,42 +8597,13 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG(); /* only DOORBELL method supported on gfx10 now */
- }
+ BUG(); /* only DOORBELL method supported on gfx10 now */
}
}
@@ -8317,7 +8626,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -8353,10 +8662,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -8376,10 +8681,6 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@@ -8437,8 +8738,7 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -8466,10 +8766,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -8620,19 +8917,9 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ce_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- }
+ offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -8658,28 +8945,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -8784,7 +9056,9 @@ static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void
@@ -8931,49 +9205,34 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
int i;
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
- } else {
- me_id = (entry->ring_id & 0x0c) >> 2;
- pipe_id = (entry->ring_id & 0x03) >> 0;
- queue_id = (entry->ring_id & 0x70) >> 4;
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- if (pipe_id == 0)
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
- else
- amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- /* Per-queue interrupt is supported for MEC starting from VI.
- * The interrupt can only be enabled/disabled per pipe instead
- * of per queue.
- */
- if ((ring->me == me_id) &&
- (ring->pipe == pipe_id) &&
- (ring->queue == queue_id))
- amdgpu_fence_process(ring);
- }
- break;
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
}
+ break;
}
return 0;
@@ -8984,12 +9243,39 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -8998,17 +9284,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -9032,8 +9376,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
case 0:
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
@@ -9060,6 +9404,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -9154,6 +9507,310 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ u32 tmp;
+ u64 addr;
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ addr = amdgpu_bo_gpu_offset(ring->mqd_obj) +
+ offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active);
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (ring->pipe == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue);
+
+ gfx_v10_0_ring_emit_wreg(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+ gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ 0, 1, 0x20);
+ gfx_v10_0_ring_emit_reg_wait(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kgq_init_queue(ring, true);
+ if (r) {
+ DRM_ERROR("fail to init kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_test_ring(ring);
+}
+
+static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+ return r;
+ }
+
+ r = gfx_v10_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ return r;
+
+ return amdgpu_ring_test_ring(ring);
+}
+
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_10_1[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ nv_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9170,6 +9827,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.set_clockgating_state = gfx_v10_0_set_clockgating_state,
.set_powergating_state = gfx_v10_0_set_powergating_state,
.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+ .dump_ip_state = gfx_v10_ip_dump,
+ .print_ip_state = gfx_v10_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
@@ -9186,7 +9845,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -9202,7 +9861,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9212,7 +9872,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
@@ -9224,6 +9884,10 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -9243,7 +9907,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9253,12 +9918,17 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -9276,7 +9946,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
@@ -9314,6 +9983,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
.process = gfx_v10_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = {
+ .set = gfx_v10_0_set_bad_op_fault_state,
+ .process = gfx_v10_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
.set = gfx_v10_0_set_priv_inst_fault_state,
.process = gfx_v10_0_priv_inst_irq,
@@ -9335,6 +10009,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
}