diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 698 |
1 files changed, 317 insertions, 381 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 8931000dcd41..f38004e6064e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL) +}; + #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -469,8 +516,8 @@ static int sdma_v4_0_irq_id_to_seq(unsigned client_id) static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_VEGA10: + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 0, 0): soc15_program_register_sequence(adev, golden_settings_sdma_4, ARRAY_SIZE(golden_settings_sdma_4)); @@ -478,7 +525,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_vg10, ARRAY_SIZE(golden_settings_sdma_vg10)); break; - case CHIP_VEGA12: + case IP_VERSION(4, 0, 1): soc15_program_register_sequence(adev, golden_settings_sdma_4, ARRAY_SIZE(golden_settings_sdma_4)); @@ -486,7 +533,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_vg12, ARRAY_SIZE(golden_settings_sdma_vg12)); break; - case CHIP_VEGA20: + case IP_VERSION(4, 2, 0): soc15_program_register_sequence(adev, golden_settings_sdma0_4_2_init, ARRAY_SIZE(golden_settings_sdma0_4_2_init)); @@ -497,17 +544,18 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma1_4_2, ARRAY_SIZE(golden_settings_sdma1_4_2)); break; - case CHIP_ARCTURUS: + case IP_VERSION(4, 2, 2): soc15_program_register_sequence(adev, golden_settings_sdma_arct, ARRAY_SIZE(golden_settings_sdma_arct)); break; - case CHIP_ALDEBARAN: + case IP_VERSION(4, 4, 0): soc15_program_register_sequence(adev, golden_settings_sdma_aldebaran, ARRAY_SIZE(golden_settings_sdma_aldebaran)); break; - case CHIP_RAVEN: + case IP_VERSION(4, 1, 0): + case IP_VERSION(4, 1, 1): soc15_program_register_sequence(adev, golden_settings_sdma_4_1, ARRAY_SIZE(golden_settings_sdma_4_1)); @@ -520,7 +568,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_rv1, ARRAY_SIZE(golden_settings_sdma_rv1)); break; - case CHIP_RENOIR: + case IP_VERSION(4, 1, 2): soc15_program_register_sequence(adev, golden_settings_sdma_4_3, ARRAY_SIZE(golden_settings_sdma_4_3)); @@ -538,12 +586,12 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev) * The only chips with SDMAv4 and ULV are VG10 and VG20. * Server SKUs take a different hysteresis setting from other SKUs. */ - switch (adev->asic_type) { - case CHIP_VEGA10: + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 0, 0): if (adev->pdev->device == 0x6860) break; return; - case CHIP_VEGA20: + case IP_VERSION(4, 2, 0): if (adev->pdev->device == 0x66a1) break; return; @@ -560,44 +608,6 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev) } } -static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v1_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - - /* arcturus shares the same FW memory across - all SDMA isntances */ - if (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) - break; - } - - memset((void *)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -612,100 +622,25 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) // vega10 real chip need to use PSP to load firmware static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) { - const char *chip_name; - char fw_name[30]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - - DRM_DEBUG("\n"); - - switch (adev->asic_type) { - case CHIP_VEGA10: - chip_name = "vega10"; - break; - case CHIP_VEGA12: - chip_name = "vega12"; - break; - case CHIP_VEGA20: - chip_name = "vega20"; - break; - case CHIP_RAVEN: - if (adev->apu_flags & AMD_APU_IS_RAVEN2) - chip_name = "raven2"; - else if (adev->apu_flags & AMD_APU_IS_PICASSO) - chip_name = "picasso"; - else - chip_name = "raven"; - break; - case CHIP_ARCTURUS: - chip_name = "arcturus"; - break; - case CHIP_RENOIR: - if (adev->apu_flags & AMD_APU_IS_RENOIR) - chip_name = "renoir"; - else - chip_name = "green_sardine"; - break; - case CHIP_ALDEBARAN: - chip_name = "aldebaran"; - break; - default: - BUG(); - } - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; + int ret, i; - err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) { - if (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) { + for (i = 0; i < adev->sdma.num_instances; i++) { + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 0)) { /* Acturus & Aldebaran will leverage the same FW memory for every SDMA instance */ - memcpy((void *)&adev->sdma.instance[i], - (void *)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - } - else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); - - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); - if (err) - goto out; - } - } - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - for (i = 0; i < adev->sdma.num_instances; i++) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + ret = amdgpu_sdma_init_microcode(adev, 0, true); + break; + } else { + ret = amdgpu_sdma_init_microcode(adev, i, false); + if (ret) + return ret; } } -out: - if (err) { - DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - sdma_v4_0_destroy_inst_ctx(adev); - } - return err; + return ret; } /** @@ -720,7 +655,7 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) u64 *rptr; /* XXX check if swapping is necessary on BE */ - rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + rptr = ((u64 *)ring->rptr_cpu_addr); DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); return ((*rptr) >> 2); @@ -740,7 +675,7 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI); @@ -766,12 +701,12 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Setting write pointer\n"); if (ring->use_doorbell) { - u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + u64 *wb = (u64 *)ring->wptr_cpu_addr; DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -809,7 +744,7 @@ static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); } else { wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI); wptr = wptr << 32; @@ -831,7 +766,7 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring->use_doorbell) { - u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + u64 *wb = (u64 *)ring->wptr_cpu_addr; /* XXX check if swapping is necessary on BE */ WRITE_ONCE(*wb, (ring->wptr << 2)); @@ -978,31 +913,23 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /** - * sdma_v4_0_gfx_stop - stop the gfx async dma engines + * sdma_v4_0_gfx_enable - enable the gfx async dma engines * * @adev: amdgpu_device pointer - * - * Stop the gfx async dma ring buffers (VEGA10). + * @enable: enable SDMA RB/IB + * control the gfx async dma ring buffers (VEGA10). */ -static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i, unset = 0; + int i; for (i = 0; i < adev->sdma.num_instances; i++) { - sdma[i] = &adev->sdma.instance[i].ring; - - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = 1; - } - rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } } @@ -1028,20 +955,10 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - bool unset = false; for (i = 0; i < adev->sdma.num_instances; i++) { - sdma[i] = &adev->sdma.instance[i].page; - - if ((adev->mman.buffer_funcs_ring == sdma[i]) && - (!unset)) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = true; - } - rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -1106,7 +1023,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) * Arcturus for the moment and firmware version 14 * and above. */ - if (adev->asic_type == CHIP_ARCTURUS && + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) && adev->sdma.instance[i].fw_version >= 14) WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable); /* Extend page fault timeout to avoid interrupt storm */ @@ -1129,7 +1047,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) int i; if (!enable) { - sdma_v4_0_gfx_stop(adev); + sdma_v4_0_gfx_enable(adev, enable); sdma_v4_0_rlc_stop(adev); if (adev->sdma.has_page_queue) sdma_v4_0_page_stop(adev); @@ -1172,13 +1090,10 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; - u32 wb_offset; u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; - wb_offset = (ring->rptr_offs * 4); - rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); @@ -1191,9 +1106,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI, - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO, - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); @@ -1223,7 +1138,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0); /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + wptr_gpu_addr = ring->wptr_gpu_addr; WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI, @@ -1245,8 +1160,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) #endif /* enable DMA IBs */ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - - ring->sched.ready = true; } /** @@ -1262,13 +1175,10 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; - u32 wb_offset; u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; - wb_offset = (ring->rptr_offs * 4); - rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); @@ -1281,9 +1191,9 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI, - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); @@ -1314,7 +1224,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0); /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + wptr_gpu_addr = ring->wptr_gpu_addr; WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI, @@ -1336,8 +1246,6 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) #endif /* enable DMA IBs */ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - - ring->sched.ready = true; } static void @@ -1393,9 +1301,10 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev) if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA)) return; - switch (adev->asic_type) { - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + case IP_VERSION(4, 1, 1): + case IP_VERSION(4, 1, 2): sdma_v4_1_init_power_gating(adev); sdma_v4_1_update_power_gating(adev, true); break; @@ -1536,13 +1445,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(page); if (r) return r; - - if (adev->mman.buffer_funcs_ring == page) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return r; @@ -1662,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1835,41 +1738,31 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) { uint fw_version = adev->sdma.instance[0].fw_version; - switch (adev->asic_type) { - case CHIP_VEGA10: + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 0, 0): return fw_version >= 430; - case CHIP_VEGA12: + case IP_VERSION(4, 0, 1): /*return fw_version >= 31;*/ return false; - case CHIP_VEGA20: + case IP_VERSION(4, 2, 0): return fw_version >= 123; default: return false; } } -static int sdma_v4_0_early_init(void *handle) +static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; - if (adev->flags & AMD_IS_APU) - adev->sdma.num_instances = 1; - else if (adev->asic_type == CHIP_ARCTURUS) - adev->sdma.num_instances = 8; - else if (adev->asic_type == CHIP_ALDEBARAN) - adev->sdma.num_instances = 5; - else - adev->sdma.num_instances = 2; - r = sdma_v4_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); + if (r) return r; - } /* TODO: Page queue breaks driver reload under SRIOV */ - if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev))) + if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 0, 0)) && + amdgpu_sriov_vf((adev))) adev->sdma.has_page_queue = false; else if (sdma_v4_0_fw_support_paging_queue(adev)) adev->sdma.has_page_queue = true; @@ -1887,32 +1780,25 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); -static int sdma_v4_0_late_init(void *handle) +static int sdma_v4_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_ih_if ih_info = { - .cb = sdma_v4_0_process_ras_data_cb, - }; + struct amdgpu_device *adev = ip_block->adev; sdma_v4_0_setup_ulv(adev); - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.funcs && - adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); - if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) - return adev->sdma.funcs->ras_late_init(adev, &ih_info); - else - return 0; + return 0; } -static int sdma_v4_0_sw_init(void *handle) +static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1970,6 +1856,17 @@ static int sdma_v4_0_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; + /* + * On Arcturus, SDMA instance 5~7 has a different vmhub + * type(AMDGPU_MMHUB1). + */ + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) && + i >= 5) + ring->vm_hub = AMDGPU_MMHUB1(0); + else + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, @@ -1985,8 +1882,27 @@ static int sdma_v4_0_sw_init(void *handle) /* paging queue use same doorbell index/routing as gfx queue * with 0x400 (4096 dwords) offset on second doorbell page */ - ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; - ring->doorbell_index += 0x400; + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >= + IP_VERSION(4, 0, 0) && + amdgpu_ip_version(adev, SDMA0_HWIP, 0) < + IP_VERSION(4, 2, 0)) { + ring->doorbell_index = + adev->doorbell_index.sdma_engine[i] << 1; + ring->doorbell_index += 0x400; + } else { + /* From vega20, the sdma_doorbell_range in 1st + * doorbell page is reserved for page queue. + */ + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] + 1) << 1; + } + + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) && + i >= 5) + ring->vm_hub = AMDGPU_MMHUB1(0); + else + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1998,83 +1914,110 @@ static int sdma_v4_0_sw_init(void *handle) } } + if (amdgpu_sdma_ras_sw_init(adev)) { + dev_err(adev->dev, "Failed to initialize sdma ras block!\n"); + return -EINVAL; + } + + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } -static int sdma_v4_0_sw_fini(void *handle) +static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; - if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) - adev->sdma.funcs->ras_fini(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); if (adev->sdma.has_page_queue) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - sdma_v4_0_destroy_inst_ctx(adev); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 2, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 0)) + amdgpu_sdma_destroy_inst_ctx(adev, true); + else + amdgpu_sdma_destroy_inst_ctx(adev, false); + + kfree(adev->sdma.ip_dump); return 0; } -static int sdma_v4_0_hw_init(void *handle) +static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block) { - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0); if (!amdgpu_sriov_vf(adev)) sdma_v4_0_init_golden_registers(adev); - r = sdma_v4_0_start(adev); - - return r; + return sdma_v4_0_start(adev); } -static int sdma_v4_0_hw_fini(void *handle) +static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; if (amdgpu_sriov_vf(adev)) return 0; - for (i = 0; i < adev->sdma.num_instances; i++) { - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } } sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0); return 0; } -static int sdma_v4_0_suspend(void *handle) +static int sdma_v4_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + /* SMU saves SDMA state for us */ + if (adev->in_s0ix) { + sdma_v4_0_gfx_enable(adev, false); + return 0; + } - return sdma_v4_0_hw_fini(adev); + return sdma_v4_0_hw_fini(ip_block); } -static int sdma_v4_0_resume(void *handle) +static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - return sdma_v4_0_hw_init(adev); + /* SMU restores SDMA state for us */ + if (adev->in_s0ix) { + sdma_v4_0_enable(adev, true); + sdma_v4_0_gfx_enable(adev, true); + return 0; + } + + return sdma_v4_0_hw_init(ip_block); } -static bool sdma_v4_0_is_idle(void *handle) +static bool sdma_v4_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -2087,11 +2030,11 @@ static bool sdma_v4_0_is_idle(void *handle) return true; } -static int sdma_v4_0_wait_for_idle(void *handle) +static int sdma_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i, j; u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { for (j = 0; j < adev->sdma.num_instances; j++) { @@ -2106,7 +2049,7 @@ static int sdma_v4_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int sdma_v4_0_soft_reset(void *handle) +static int sdma_v4_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ @@ -2132,23 +2075,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t instance; + int instance; DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); break; case 1: - if (adev->asic_type == CHIP_VEGA20) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 0)) amdgpu_fence_process(&adev->sdma.instance[instance].page); break; case 2: /* XXX compute */ break; case 3: - if (adev->asic_type != CHIP_VEGA20) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) != + IP_VERSION(4, 2, 0)) amdgpu_fence_process(&adev->sdma.instance[instance].page); break; } @@ -2217,7 +2165,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_0_irq_id_to_seq(entry->client_id); @@ -2229,15 +2177,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, + " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task.comm, task_info->task.pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } @@ -2344,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep( } } -static int sdma_v4_0_set_clockgating_state(void *handle, +static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -2359,14 +2312,15 @@ static int sdma_v4_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_0_set_powergating_state(void *handle, +static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - switch (adev->asic_type) { - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + case IP_VERSION(4, 1, 1): + case IP_VERSION(4, 1, 2): sdma_v4_1_update_power_gating(adev, state == AMD_PG_STATE_GATE); break; @@ -2377,9 +2331,9 @@ static int sdma_v4_0_set_powergating_state(void *handle, return 0; } -static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v4_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -2396,6 +2350,46 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + + if (!adev->sdma.ip_dump) + return; + + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_0_get_reg_offset(adev, i, + sdma_reg_list_4_0[j].reg_offset)); + } +} + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, @@ -2412,50 +2406,16 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_clockgating_state = sdma_v4_0_set_clockgating_state, .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, + .dump_ip_state = sdma_v4_0_dump_ip_state, + .print_ip_state = sdma_v4_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = sdma_v4_0_ring_get_rptr, - .get_wptr = sdma_v4_0_ring_get_wptr, - .set_wptr = sdma_v4_0_ring_set_wptr, - .emit_frame_size = - 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* hdp invalidate */ - 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - /* sdma_v4_0_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + - 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ - .emit_ib = sdma_v4_0_ring_emit_ib, - .emit_fence = sdma_v4_0_ring_emit_fence, - .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, - .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, - .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .test_ring = sdma_v4_0_ring_test_ring, - .test_ib = sdma_v4_0_ring_test_ib, - .insert_nop = sdma_v4_0_ring_insert_nop, - .pad_ib = sdma_v4_0_ring_pad_ib, - .emit_wreg = sdma_v4_0_ring_emit_wreg, - .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -/* - * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). - * So create a individual constant ring_funcs for those instances. - */ -static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { - .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, - .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), - .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_1, + .secure_submission_supported = true, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -2484,42 +2444,10 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = sdma_v4_0_ring_get_rptr, - .get_wptr = sdma_v4_0_page_ring_get_wptr, - .set_wptr = sdma_v4_0_page_ring_set_wptr, - .emit_frame_size = - 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* hdp invalidate */ - 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - /* sdma_v4_0_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + - 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ - .emit_ib = sdma_v4_0_ring_emit_ib, - .emit_fence = sdma_v4_0_ring_emit_fence, - .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, - .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, - .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .test_ring = sdma_v4_0_ring_test_ring, - .test_ib = sdma_v4_0_ring_test_ib, - .insert_nop = sdma_v4_0_ring_insert_nop, - .pad_ib = sdma_v4_0_ring_pad_ib, - .emit_wreg = sdma_v4_0_ring_emit_wreg, - .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { - .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, - .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), - .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_1, + .secure_submission_supported = true, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2551,19 +2479,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->asic_type == CHIP_ARCTURUS && i >= 5) - adev->sdma.instance[i].ring.funcs = - &sdma_v4_0_ring_funcs_2nd_mmhub; - else - adev->sdma.instance[i].ring.funcs = - &sdma_v4_0_ring_funcs; + adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - if (adev->asic_type == CHIP_ARCTURUS && i >= 5) - adev->sdma.instance[i].page.funcs = - &sdma_v4_0_page_ring_funcs_2nd_mmhub; - else - adev->sdma.instance[i].page.funcs = + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } @@ -2632,7 +2551,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (VEGA10/12). * Used by the amdgpu ttm implementation to move pages if @@ -2642,11 +2561,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); @@ -2744,7 +2663,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value, } } -static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, +static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev, uint32_t instance, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -2766,6 +2685,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, return 0; }; +static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + int i = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) { + dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i); + return; + } + } +} + static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) { int i; @@ -2777,22 +2708,27 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) } } -static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { - .ras_late_init = amdgpu_sdma_ras_late_init, - .ras_fini = amdgpu_sdma_ras_fini, +const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = { .query_ras_error_count = sdma_v4_0_query_ras_error_count, .reset_ras_error_count = sdma_v4_0_reset_ras_error_count, }; +static struct amdgpu_sdma_ras sdma_v4_0_ras = { + .ras_block = { + .hw_ops = &sdma_v4_0_ras_hw_ops, + .ras_cb = sdma_v4_0_process_ras_data_cb, + }, +}; + static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - adev->sdma.funcs = &sdma_v4_0_ras_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 2, 0): + case IP_VERSION(4, 2, 2): + adev->sdma.ras = &sdma_v4_0_ras; break; - case CHIP_ALDEBARAN: - adev->sdma.funcs = &sdma_v4_4_ras_funcs; + case IP_VERSION(4, 4, 0): + adev->sdma.ras = &sdma_v4_4_ras; break; default: break; |
