summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c407
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c123
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c384
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c325
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c182
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c210
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c470
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c171
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c243
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c305
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c350
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c372
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h142
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c261
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c330
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c237
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c399
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h182
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c658
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c278
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c312
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c114
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c194
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c4165
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c850
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c616
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c1074
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c915
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c491
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c1541
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c10
184 files changed, 17286 insertions, 3251 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 12adca8c7819..b91e79c721e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -69,6 +69,16 @@ config DRM_AMDGPU_USERPTR
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
+config DRM_AMDGPU_WERROR
+ bool "Force the compiler to throw an error instead of a warning when compiling"
+ depends on DRM_AMDGPU
+ depends on EXPERT
+ depends on !COMPILE_TEST
+ default n
+ help
+ Add -Werror to the build flags for amdgpu.ko.
+ Only enable this if you are warning code for amdgpu.ko.
+
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 415a7fa395c4..8d16f280b695 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -39,6 +39,26 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
+subdir-ccflags-y := -Wextra
+subdir-ccflags-y += -Wunused
+subdir-ccflags-y += -Wmissing-prototypes
+subdir-ccflags-y += -Wmissing-declarations
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-format-attribute
+# Need this to avoid recursive variable evaluation issues
+cond-flags := $(call cc-option, -Wunused-but-set-variable) \
+ $(call cc-option, -Wunused-const-variable) \
+ $(call cc-option, -Wstringop-truncation) \
+ $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(cond-flags)
+subdir-ccflags-y += -Wno-unused-parameter
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-sign-compare
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-override-init
+subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
+
amdgpu-y := amdgpu_drv.o
# add KMS driver
@@ -60,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
- amdgpu_ring_mux.o
+ amdgpu_ring_mux.o amdgpu_xcp.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@@ -78,7 +98,7 @@ amdgpu-y += \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
- nbio_v7_9.o
+ nbio_v7_9.o aqua_vanjaram_reg_init.o
# add DF block
amdgpu-y += \
@@ -183,12 +203,14 @@ amdgpu-y += \
vcn_v2_5.o \
vcn_v3_0.o \
vcn_v4_0.o \
+ vcn_v4_0_3.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
jpeg_v2_5.o \
jpeg_v3_0.o \
- jpeg_v4_0.o
+ jpeg_v4_0.o \
+ jpeg_v4_0_3.o
# add ATHUB block
amdgpu-y += \
@@ -203,6 +225,7 @@ amdgpu-y += \
smuio_v11_0.o \
smuio_v11_0_6.o \
smuio_v13_0.o \
+ smuio_v13_0_3.o \
smuio_v13_0_6.o
# add reset block
@@ -228,6 +251,7 @@ amdgpu-y += \
amdgpu_amdkfd_gfx_v9.o \
amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_aldebaran.o \
+ amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 02b827785e39..a84bd4a0c421 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,8 +107,9 @@
#include "amdgpu_fdinfo.h"
#include "amdgpu_mca.h"
#include "amdgpu_ras.h"
+#include "amdgpu_xcp.h"
-#define MAX_GPU_INSTANCE 16
+#define MAX_GPU_INSTANCE 64
struct amdgpu_gpu_instance
{
@@ -212,6 +213,8 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p;
+extern int amdgpu_mtype_local;
+extern bool enforce_isolation;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
@@ -242,9 +245,10 @@ extern int amdgpu_num_kcq;
extern int amdgpu_vcnfw_log;
extern int amdgpu_sg_display;
+extern int amdgpu_user_partt_mode;
+
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
-#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
@@ -282,6 +286,7 @@ extern int amdgpu_sg_display;
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
+struct amdgpu_xcp_mgr;
struct amdgpu_device;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
@@ -463,6 +468,8 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
+ /** GPU partition selection */
+ uint32_t xcp_id;
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
@@ -573,6 +580,8 @@ struct amdgpu_asic_funcs {
/* query video codecs */
int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs);
+ /* encode "> 32bits" smn addressing */
+ u64 (*encode_ext_smn_addressing)(int ext_id);
};
/*
@@ -607,6 +616,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
+
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
@@ -657,7 +669,7 @@ enum amd_hw_ip_block_type {
MAX_HWIP
};
-#define HWIP_MAX_INSTANCE 28
+#define HWIP_MAX_INSTANCE 44
#define HW_ID_MAX 300
#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
@@ -665,6 +677,17 @@ enum amd_hw_ip_block_type {
#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
#define IP_VERSION_REV(ver) ((ver) & 0xFF)
+struct amdgpu_ip_map_info {
+ /* Map of logical to actual dev instances/mask */
+ uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
+ int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst);
+ uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask);
+};
+
struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
@@ -750,6 +773,7 @@ struct amdgpu_device {
struct amdgpu_acp acp;
#endif
struct amdgpu_hive_info *hive;
+ struct amdgpu_xcp_mgr *xcp_mgr;
/* ASIC */
enum amd_asic_type asic_type;
uint32_t family;
@@ -797,6 +821,8 @@ struct amdgpu_device {
amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg;
+ amdgpu_rreg_ext_t pcie_rreg_ext;
+ amdgpu_wreg_ext_t pcie_wreg_ext;
amdgpu_rreg64_t pcie_rreg64;
amdgpu_wreg64_t pcie_wreg64;
/* protects concurrent UVD register access */
@@ -830,7 +856,7 @@ struct amdgpu_device {
dma_addr_t dummy_page_addr;
struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
- unsigned num_vmhubs;
+ DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
/* memory management */
struct amdgpu_mman mman;
@@ -962,6 +988,7 @@ struct amdgpu_device {
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+ struct amdgpu_ip_map_info ip_map;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
@@ -1020,6 +1047,9 @@ struct amdgpu_device {
struct pci_saved_state *pci_state;
pci_channel_state_t pci_channel_state;
+ /* Track auto wait count on s_barrier settings */
+ bool barrier_has_auto_waitcnt;
+
struct amdgpu_reset_control *reset_cntl;
uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
@@ -1050,6 +1080,8 @@ struct amdgpu_device {
bool job_hang;
bool dc_enabled;
+ /* Mask of active clusters */
+ uint32_t aid_mask;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@@ -1081,11 +1113,18 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
void *buf, size_t size, bool write);
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask);
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
uint32_t reg, uint32_t v);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@@ -1137,6 +1176,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
+#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
@@ -1204,7 +1245,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
/*
* ASICs macro.
*/
-#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
+#define amdgpu_asic_set_vga_state(adev, state) \
+ ((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
@@ -1235,6 +1277,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
+#define for_each_inst(i, inst_mask) \
+ for (i = ffs(inst_mask) - 1; inst_mask; \
+ inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1)
+
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
/* Common functions */
@@ -1348,6 +1394,12 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
/* amdgpu_acpi.c */
+struct amdgpu_numa_info {
+ uint64_t size;
+ int pxm;
+ int nid;
+};
+
/* ATCS Device/Driver State */
#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0
#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3
@@ -1365,15 +1417,32 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state);
int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size);
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info);
void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
void amdgpu_acpi_detect(void);
+void amdgpu_acpi_release(void);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
+ u64 *tmr_offset, u64 *tmr_size)
+{
+ return -EINVAL;
+}
+static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
+ int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ return -EINVAL;
+}
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline void amdgpu_acpi_detect(void) { }
+static inline void amdgpu_acpi_release(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state) { return 0; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index fd6e83795873..385c6acb5728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -26,6 +26,7 @@
#include <linux/acpi.h>
#include <linux/backlight.h>
#include <linux/slab.h>
+#include <linux/xarray.h>
#include <linux/power_supply.h>
#include <linux/pm_runtime.h>
#include <linux/suspend.h>
@@ -38,6 +39,45 @@
#include "amd_acpi.h"
#include "atom.h"
+/* Declare GUID for AMD _DSM method for XCCs */
+static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
+ 0xb8, 0xb4, 0x45, 0x56, 0x2e,
+ 0x8c, 0x5b, 0xec);
+
+#define AMD_XCC_HID_START 3000
+#define AMD_XCC_DSM_GET_NUM_FUNCS 0
+#define AMD_XCC_DSM_GET_SUPP_MODE 1
+#define AMD_XCC_DSM_GET_XCP_MODE 2
+#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
+#define AMD_XCC_DSM_GET_TMR_INFO 5
+#define AMD_XCC_DSM_NUM_FUNCS 5
+
+#define AMD_XCC_MAX_HID 24
+
+struct xarray numa_info_xa;
+
+/* Encapsulates the XCD acpi object information */
+struct amdgpu_acpi_xcc_info {
+ struct list_head list;
+ struct amdgpu_numa_info *numa_info;
+ uint8_t xcp_node;
+ uint8_t phy_id;
+ acpi_handle handle;
+};
+
+struct amdgpu_acpi_dev_info {
+ struct list_head list;
+ struct list_head xcc_list;
+ uint16_t bdf;
+ uint16_t supp_xcp_mode;
+ uint16_t xcp_mode;
+ uint16_t mem_mode;
+ uint64_t tmr_base;
+ uint64_t tmr_size;
+};
+
+struct list_head amdgpu_acpi_dev_list;
+
struct amdgpu_atif_notification_cfg {
bool enabled;
int command_code;
@@ -801,6 +841,343 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
return r;
}
+#ifdef CONFIG_ACPI_NUMA
+static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
+{
+ /* This is directly using si_meminfo_node implementation as the
+ * function is not exported.
+ */
+ int zone_type;
+ uint64_t managed_pages = 0;
+
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ managed_pages +=
+ zone_managed_pages(&pgdat->node_zones[zone_type]);
+ return managed_pages * PAGE_SIZE;
+}
+
+static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
+{
+ struct amdgpu_numa_info *numa_info;
+ int nid;
+
+ numa_info = xa_load(&numa_info_xa, pxm);
+
+ if (!numa_info) {
+ struct sysinfo info;
+
+ numa_info = kzalloc(sizeof *numa_info, GFP_KERNEL);
+ if (!numa_info)
+ return NULL;
+
+ nid = pxm_to_node(pxm);
+ numa_info->pxm = pxm;
+ numa_info->nid = nid;
+
+ if (numa_info->nid == NUMA_NO_NODE) {
+ si_meminfo(&info);
+ numa_info->size = info.totalram * info.mem_unit;
+ } else {
+ numa_info->size = amdgpu_acpi_get_numa_size(nid);
+ }
+ xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
+ }
+
+ return numa_info;
+}
+#endif
+
+/**
+ * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
+ * acpi device handle
+ *
+ * @handle: acpi handle
+ * @numa_info: amdgpu_numa_info structure holding numa information
+ *
+ * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
+ * given amdgpu acpi device.
+ *
+ * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
+ */
+static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
+ struct amdgpu_numa_info **numa_info)
+{
+#ifdef CONFIG_ACPI_NUMA
+ u64 pxm;
+ acpi_status status;
+
+ if (!numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+
+ if (ACPI_FAILURE(status))
+ return status;
+
+ *numa_info = amdgpu_acpi_get_numa_info(pxm);
+
+ if (!*numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ return_ACPI_STATUS(AE_OK);
+#else
+ return_ACPI_STATUS(AE_NOT_EXIST);
+#endif
+}
+
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
+{
+ struct amdgpu_acpi_dev_info *acpi_dev;
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return NULL;
+
+ list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
+ if (acpi_dev->bdf == bdf)
+ return acpi_dev;
+
+ return NULL;
+}
+
+static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
+ struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
+{
+ struct amdgpu_acpi_dev_info *tmp;
+ union acpi_object *obj;
+ int ret = -ENOENT;
+
+ *dev_info = NULL;
+ tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmp->xcc_list);
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->bdf = bdf;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_SUPP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_SUPP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
+ ACPI_FREE(obj);
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_XCP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_XCP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->xcp_mode = obj->integer.value & 0xFFFF;
+ tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_TMR_INFO, NULL,
+ ACPI_TYPE_PACKAGE);
+
+ if (!obj || obj->package.count < 2) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_TMR_INFO);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->tmr_base = obj->package.elements[0].integer.value;
+ tmp->tmr_size = obj->package.elements[1].integer.value;
+ ACPI_FREE(obj);
+
+ DRM_DEBUG_DRIVER(
+ "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
+ tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+ tmp->tmr_base, tmp->tmr_size);
+ list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
+ *dev_info = tmp;
+
+ return 0;
+
+out:
+ if (obj)
+ ACPI_FREE(obj);
+ kfree(tmp);
+
+ return ret;
+}
+
+static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
+ u16 *bdf)
+{
+ union acpi_object *obj;
+ acpi_status status;
+ int ret = -ENOENT;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
+ goto out;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* PF xcc id [39:32] */
+ xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
+ /* xcp node of this xcc [47:40] */
+ xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+ /* PF bus/dev/fn of this xcc [63:48] */
+ *bdf = (obj->integer.value >> 48) & 0xFFFF;
+ ACPI_FREE(obj);
+ obj = NULL;
+
+ status =
+ amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
+
+ /* TODO: check if this check is required */
+ if (ACPI_SUCCESS(status))
+ ret = 0;
+out:
+ if (obj)
+ ACPI_FREE(obj);
+
+ return ret;
+}
+
+static int amdgpu_acpi_enumerate_xcc(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info = NULL;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ struct acpi_device *acpi_dev;
+ char hid[ACPI_ID_LEN];
+ int ret, id;
+ u16 bdf;
+
+ INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
+ xa_init(&numa_info_xa);
+
+ for (id = 0; id < AMD_XCC_MAX_HID; id++) {
+ sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
+ acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
+ /* These ACPI objects are expected to be in sequential order. If
+ * one is not found, no need to check the rest.
+ */
+ if (!acpi_dev) {
+ DRM_DEBUG_DRIVER("No matching acpi device found for %s",
+ hid);
+ break;
+ }
+
+ xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
+ GFP_KERNEL);
+ if (!xcc_info) {
+ DRM_ERROR("Failed to allocate memory for xcc info\n");
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&xcc_info->list);
+ xcc_info->handle = acpi_device_handle(acpi_dev);
+ acpi_dev_put(acpi_dev);
+
+ ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
+ if (ret) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ dev_info = amdgpu_acpi_get_dev(bdf);
+
+ if (!dev_info)
+ ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf);
+
+ if (ret == -ENOMEM)
+ return ret;
+
+ if (!dev_info) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ list_add_tail(&xcc_info->list, &dev_info->xcc_list);
+ }
+
+ return 0;
+}
+
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ u16 bdf;
+
+ if (!tmr_offset || !tmr_size)
+ return -EINVAL;
+
+ bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
+ dev_info = amdgpu_acpi_get_dev(bdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ *tmr_offset = dev_info->tmr_base;
+ *tmr_size = dev_info->tmr_size;
+
+ return 0;
+}
+
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ u16 bdf;
+
+ if (!numa_info)
+ return -EINVAL;
+
+ bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
+ dev_info = amdgpu_acpi_get_dev(bdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
+ if (xcc_info->phy_id == xcc_id) {
+ memcpy(numa_info, xcc_info->numa_info,
+ sizeof(*numa_info));
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
/**
* amdgpu_acpi_event - handle notify events
*
@@ -1054,6 +1431,36 @@ void amdgpu_acpi_detect(void)
} else {
atif->backlight_caps.caps_valid = false;
}
+
+ amdgpu_acpi_enumerate_xcc();
+}
+
+void amdgpu_acpi_release(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
+ struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
+ struct amdgpu_numa_info *numa_info;
+ unsigned long index;
+
+ xa_for_each(&numa_info_xa, index, numa_info) {
+ kfree(numa_info);
+ xa_erase(&numa_info_xa, index);
+ }
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return;
+
+ list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
+ list) {
+ list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
+ list) {
+ list_del(&xcc_info->list);
+ kfree(xcc_info);
+ }
+
+ list_del(&dev_info->list);
+ kfree(dev_info);
+ }
}
#if IS_ENABLED(CONFIG_SUSPEND)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0385f7f69278..b4fcad0e62f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -53,7 +53,6 @@ int amdgpu_amdkfd_init(void)
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
ret = kgd2kfd_init();
- amdgpu_amdkfd_gpuvm_init_mem_limits();
kfd_initialized = !ret;
return ret;
@@ -143,6 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int i;
int last_valid_bit;
+ amdgpu_amdkfd_gpuvm_init_mem_limits();
+
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap =
@@ -162,7 +163,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* clear
*/
bitmap_complement(gpu_resources.cp_queue_bitmap,
- adev->gfx.mec.queue_bitmap,
+ adev->gfx.mec_bitmap[0].queue_bitmap,
KGD_MAX_QUEUES);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
@@ -427,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
}
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
- struct kfd_local_mem_info *mem_info)
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp)
{
memset(mem_info, 0, sizeof(*mem_info));
- mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
- mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+ if (xcp) {
+ if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
+ mem_info->local_mem_size_public =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ else
+ mem_info->local_mem_size_private =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ } else {
+ mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+ mem_info->local_mem_size_private = adev->gmc.real_vram_size -
adev->gmc.visible_vram_size;
-
+ }
mem_info->vram_width = adev->gmc.vram_width;
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
@@ -497,7 +507,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags)
+ uint32_t *flags, int8_t *xcp_id)
{
struct dma_buf *dma_buf;
struct drm_gem_object *obj;
@@ -541,6 +551,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
}
+ if (xcp_id)
+ *xcp_id = bo->xcp_id;
out_put:
dma_buf_put(dma_buf);
@@ -732,17 +744,19 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
if (adev->family == AMDGPU_FAMILY_AI) {
int i;
- for (i = 0; i < adev->num_vmhubs; i++)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
} else {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
}
return 0;
}
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
+ uint16_t pasid,
+ enum TLB_FLUSH_TYPE flush_type,
+ uint32_t inst)
{
bool all_hub = false;
@@ -750,7 +764,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
adev->family == AMDGPU_FAMILY_RV)
all_hub = true;
- return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
}
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
@@ -758,11 +772,32 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
return adev->have_atomics_support;
}
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+{
+ amdgpu_device_flush_hdp(adev, NULL);
+}
+
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
{
amdgpu_umc_poison_handler(adev, reset);
}
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload)
+{
+ int ret;
+
+ /* Device or IH ring is not ready so bail. */
+ ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
+ if (ret)
+ return ret;
+
+ /* Send payload to fence KFD interrupts */
+ amdgpu_amdkfd_interrupt(adev, payload);
+
+ return 0;
+}
+
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
{
if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
@@ -770,3 +805,28 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
else
return false;
}
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+ return kgd2kfd_check_and_lock_kfd();
+}
+
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+ kgd2kfd_unlock_kfd();
+}
+
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
+{
+ u64 tmp;
+ s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
+
+ if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
+ tmp = adev->gmc.mem_partitions[mem_id].size;
+ do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+ return ALIGN_DOWN(tmp, PAGE_SIZE);
+ } else {
+ return adev->gmc.real_vram_size;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 01ba3589b60a..2d0406bff84e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -30,10 +30,12 @@
#include <linux/kthread.h>
#include <linux/workqueue.h>
#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
+#include "amdgpu_xcp.h"
extern uint64_t amdgpu_amdkfd_total_mem_size;
@@ -97,10 +99,13 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
- int64_t vram_used;
- uint64_t vram_used_aligned;
+ int64_t vram_used[MAX_XCP];
+ uint64_t vram_used_aligned[MAX_XCP];
bool init_complete;
struct work_struct reset_work;
+
+ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
+ struct dev_pagemap pgmap;
};
enum kgd_engine_type {
@@ -151,6 +156,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
@@ -160,7 +167,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
uint16_t vmid);
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
+ uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
+ uint32_t inst);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -224,7 +232,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
enum kgd_engine_type type);
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
- struct kfd_local_mem_info *mem_info);
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp);
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
@@ -234,13 +243,15 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags);
+ uint32_t *flags, int8_t *xcp_id);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
struct amdgpu_device *src);
int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
struct amdgpu_device *src,
bool is_min);
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload);
/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
@@ -279,7 +290,8 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
-size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
@@ -310,6 +322,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
uint64_t *mmap_offset);
int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
struct dma_buf **dmabuf);
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
@@ -319,9 +332,18 @@ void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 alloc_flag);
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 alloc_flag);
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+
+#define KFD_XCP_MEM_ID(adev, xcp_id) \
+ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
+ (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
+
+#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
+
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
@@ -352,6 +374,17 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
{
}
#endif
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
+#else
+static inline
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
+{
+ return 0;
+}
+#endif
+
/* KGD2KFD callbacks */
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
@@ -372,6 +405,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(void);
+void kgd2kfd_unlock_kfd(void);
#else
static inline int kgd2kfd_init(void)
{
@@ -437,5 +472,14 @@ static inline
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
}
+
+static inline int kgd2kfd_check_and_lock_kfd(void)
+{
+ return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(void)
+{
+}
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 4485bb29bec9..60f9e027fb66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,149 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h"
#include "amdgpu_amdkfd_gfx_v9.h"
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
+static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_aldebaran_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
@@ -42,5 +185,14 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
- .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+ .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
+ .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 4191af5a3f13..625db444df1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_reset.h"
#include "sdma0/sdma0_4_2_2_offset.h"
#include "sdma0/sdma0_4_2_2_sh_mask.h"
#include "sdma1/sdma1_4_2_2_offset.h"
@@ -48,6 +49,8 @@
#include "amdgpu_amdkfd_gfx_v9.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v9_4.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -276,6 +279,117 @@ int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
return 0;
}
+/*
+ * Helper used to suspend/resume gfx pipe for image post process work to set
+ * barrier behaviour.
+ */
+static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ if (!(ring && ring->sched.thread))
+ continue;
+
+ /* stop secheduler and drain ring. */
+ if (suspend) {
+ drm_sched_stop(&ring->sched, NULL);
+ r = amdgpu_fence_wait_empty(ring);
+ if (r)
+ goto out;
+ } else {
+ drm_sched_start(&ring->sched, false);
+ }
+ }
+
+out:
+ /* return on resume or failure to drain rings. */
+ if (!suspend || r)
+ return r;
+
+ return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
+}
+
+static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
+{
+ uint32_t data;
+
+ WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ amdgpu_amdkfd_suspend(adev, false);
+
+ if (suspend_resume_compute_scheduler(adev, true))
+ goto out;
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
+ data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !enable_waitcnt);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
+
+out:
+ suspend_resume_compute_scheduler(adev, false);
+
+ amdgpu_amdkfd_resume(adev, false);
+
+ up_read(&adev->reset_domain->sem);
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, false);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -294,6 +408,15 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_arcturus_enable_debug_trap,
+ .disable_debug_trap = kgd_arcturus_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
new file mode 100644
index 000000000000..5b4b7f8b92a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "athub/athub_1_8_0_offset.h"
+#include "athub/athub_1_8_0_sh_mask.h"
+#include "oss/osssys_4_4_2_offset.h"
+#include "oss/osssys_4_4_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base =
+ SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
+ regSDMA_RLC0_RB_CNTL) -
+ regSDMA_RLC0_RB_CNTL;
+ uint32_t retval = sdma_engine_reg_base +
+ queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
+ return retval;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
+ reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_MIDCMD_DATA0;
+ reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+ temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
+ SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
+ u32 pasid, unsigned int vmid, uint32_t xcc_inst)
+{
+ unsigned long timeout;
+ unsigned int reg;
+ unsigned int phy_inst = GET_INST(GC, xcc_inst);
+ /* Every two XCCs share one AID */
+ unsigned int aid = phy_inst / 2;
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
+
+ timeout = jiffies + msecs_to_jiffies(10);
+ while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid))) {
+ if (time_after(jiffies, timeout)) {
+ pr_err("Fail to program VMID-PASID mapping\n");
+ return -ETIME;
+ }
+ cpu_relax();
+ }
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
+ /* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
+ * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
+ * programs _LUT for XCC and "aid * 4" for AID where the XCC connects
+ * to.
+ */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4 + (phy_inst % 2) + 1);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
+
+ return 0;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, hqd_end, data;
+
+ m = get_mqd(mqd);
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
+ hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
+
+ for (reg = hqd_base; reg <= hqd_end; reg++)
+ WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL),
+ data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uintptr_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1),
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
+ queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data);
+
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return 0;
+}
+
+const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_4_3_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base =
+ kgd_gfx_v9_set_vm_context_page_table_base,
+ .program_trap_handler_settings =
+ kgd_gfx_v9_program_trap_handler_settings
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 9378fc79e9ea..8ad7a7779e14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -21,6 +21,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "athub/athub_2_0_0_offset.h"
@@ -31,6 +32,7 @@
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -79,7 +81,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -91,7 +93,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
- unsigned int vmid)
+ unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
@@ -135,7 +137,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
* but still works
*/
-static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
@@ -205,7 +208,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
- uint32_t wptr_mask, struct mm_struct *mm)
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
@@ -286,9 +289,9 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
@@ -303,7 +306,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -330,7 +333,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
release_queue(adev);
return r;
@@ -338,7 +341,7 @@ out_unlock:
static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
@@ -469,7 +472,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
@@ -510,7 +513,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
@@ -673,7 +676,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
@@ -708,8 +711,295 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
+/*
+ * GFX10 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~3500 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because current GFX10 chips cannot support multi-process debugging due to
+ * trap configuration and masking being limited to global scope. Always
+ * assume single process conditions.
+ *
+ */
+
+#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
+static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
+{
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ int i;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+ if (restore_dbg_registers) {
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, 1 << vmid);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ /* Turning off this watch point until we set all the registers */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ /*
+ * The CP cannont handle a 0 grace period input and will result in
+ * an infinite grace period being set so set to 1 to prevent this.
+ */
+ if (grace_period == 0)
+ grace_period = 1;
+
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ grace_period);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
static void program_trap_handler_settings(struct amdgpu_device *adev,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -750,5 +1040,14 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
new file mode 100644
index 000000000000..e6b70196071a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid);
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index ba21ec6b35e0..8c8437a4383f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -22,6 +22,7 @@
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
#include "gc/gc_10_3_0_offset.h"
#include "gc/gc_10_3_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
@@ -80,7 +81,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -93,7 +94,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
/* ATC is defeatured on Sienna_Cichlid */
static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
- unsigned int vmid)
+ unsigned int vmid, uint32_t inst)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
@@ -105,7 +106,8 @@ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int
return 0;
}
-static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id)
+static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
@@ -177,7 +179,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
- uint32_t wptr_mask, struct mm_struct *mm)
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
@@ -273,9 +275,9 @@ static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
@@ -290,7 +292,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -317,7 +319,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
release_queue(adev);
return r;
@@ -325,7 +327,7 @@ out_unlock:
static int hqd_dump_v10_3(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
@@ -456,7 +458,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
@@ -498,7 +500,7 @@ static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
@@ -586,7 +588,7 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
static int wave_control_execute_v10_3(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
@@ -628,7 +630,8 @@ static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
}
static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -652,142 +655,6 @@ static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
unlock_srbm(adev);
}
-#if 0
-uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev,
- uint32_t trap_debug_wave_launch_mode,
- uint32_t vmid)
-{
- uint32_t data = 0;
- uint32_t orig_wave_cntl_value;
- uint32_t orig_stall_vmid;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
- 0,
- mmSPI_GDBG_WAVE_CNTL));
- orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
- SPI_GDBG_WAVE_CNTL,
- STALL_VMID);
-
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- data = 0;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev)
-{
- mutex_lock(&adev->grbm_idx_mutex);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev,
- uint32_t trap_override,
- uint32_t trap_mask)
-{
- uint32_t data = 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- data = 0;
- data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
- EXCP_EN, trap_mask);
- data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
- REPLACE, trap_override);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev,
- uint8_t wave_launch_mode,
- uint32_t vmid)
-{
- uint32_t data = 0;
- bool is_stall_mode;
- bool is_mode_set;
-
- is_stall_mode = (wave_launch_mode == 4);
- is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
- VMID_MASK, is_mode_set ? 1 << vmid : 0);
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
- MODE, is_mode_set ? wave_launch_mode : 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
-
- data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
- STALL_VMID, is_stall_mode ? 1 << vmid : 0);
- data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
- STALL_RA, is_stall_mode ? 1 : 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
-
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-
-/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
- * The values read are:
- * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
- * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
- * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
- * gws_wait_time -- Wait Count for Global Wave Syncs.
- * que_sleep_wait_time -- Wait Count for Dequeue Retry.
- * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
- * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
- * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
- */
-void get_iq_wait_times_v10_3(struct amdgpu_device *adev,
- uint32_t *wait_times)
-
-{
- *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
-}
-
-void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev,
- uint32_t wait_times,
- uint32_t grace_period,
- uint32_t *reg_offset,
- uint32_t *reg_data)
-{
- *reg_data = wait_times;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
-
- *reg_offset = mmCP_IQ_WAIT_TIME2;
-}
-#endif
-
const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v10_3,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
@@ -805,12 +672,13 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
-#if 0
- .enable_debug_trap = enable_debug_trap_v10_3,
- .disable_debug_trap = disable_debug_trap_v10_3,
- .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
- .set_wave_launch_mode = set_wave_launch_mode_v10_3,
- .get_iq_wait_times = get_iq_wait_times_v10_3,
- .build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
-#endif
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index 7e80caa05060..91c3574ebed3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -30,6 +30,7 @@
#include "soc15d.h"
#include "v11_structs.h"
#include "soc21.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -78,7 +79,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -89,7 +90,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
}
static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
- unsigned int vmid)
+ unsigned int vmid, uint32_t inst)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
@@ -101,7 +102,8 @@ static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int p
return 0;
}
-static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
+static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
@@ -162,7 +164,7 @@ static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t inst)
{
struct v11_compute_mqd *m;
uint32_t *mqd_hqd;
@@ -258,9 +260,9 @@ static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v11_compute_mqd *m;
uint32_t mec, pipe;
int r;
@@ -275,7 +277,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -302,7 +304,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
release_queue(adev);
return r;
@@ -310,7 +312,7 @@ out_unlock:
static int hqd_dump_v11(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
@@ -445,7 +447,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
}
static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
+ uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
@@ -486,7 +488,7 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
@@ -571,7 +573,7 @@ static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
static int wave_control_execute_v11(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
@@ -606,6 +608,183 @@ static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
}
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4))
+ *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v11,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
@@ -622,4 +801,11 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.wave_control_execute = wave_control_execute_v11,
.get_atc_vmid_pasid_mapping_info = NULL,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
+ .enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v11_set_address_watch,
+ .clear_address_watch = kgd_gfx_v11_clear_address_watch
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index e83cb1c09610..6bf448ab3dff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -78,7 +78,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
- unsigned int vmid)
+ unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
@@ -114,7 +114,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
return 0;
}
-static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
@@ -158,7 +159,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
- uint32_t wptr_mask, struct mm_struct *mm)
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct cik_mqd *m;
uint32_t *mqd_hqd;
@@ -202,7 +203,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS (35+4)
@@ -318,7 +319,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
@@ -358,7 +359,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t temp;
enum hqd_dequeue_request_type type;
@@ -494,7 +495,7 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
uint32_t data;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 870f352837fc..cd06e4a6d1da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -72,7 +72,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
@@ -85,7 +85,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
- unsigned int vmid)
+ unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
@@ -109,7 +109,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
return 0;
}
-static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
@@ -153,7 +154,7 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
- uint32_t wptr_mask, struct mm_struct *mm)
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct vi_mqd *m;
uint32_t *mqd_hqd;
@@ -226,7 +227,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS (54+4)
@@ -350,7 +351,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
@@ -390,7 +391,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t temp;
enum hqd_dequeue_request_type type;
@@ -540,7 +541,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e92b93557c13..51d93fb13ea3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -38,6 +38,7 @@
#include "soc15d.h"
#include "gfx_v9_0.h"
#include "amdgpu_amdkfd_gfx_v9.h"
+#include <uapi/linux/kfd_ioctl.h>
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -46,29 +47,29 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
- uint32_t queue, uint32_t vmid)
+static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid, uint32_t inst)
{
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, mec, pipe, queue, vmid);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
}
-static void unlock_srbm(struct amdgpu_device *adev)
+static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
{
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
mutex_unlock(&adev->srbm_mutex);
}
-static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
- uint32_t queue_id)
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
{
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(adev, mec, pipe, queue_id, 0);
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
}
-static uint64_t get_queue_mask(struct amdgpu_device *adev,
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
@@ -77,28 +78,28 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
return 1ull << bit;
}
-static void release_queue(struct amdgpu_device *adev)
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
{
- unlock_srbm(adev);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
+ uint32_t sh_mem_bases, uint32_t inst)
{
- lock_srbm(adev, 0, 0, 0, vmid);
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
- unlock_srbm(adev);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
- unsigned int vmid)
+ unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
@@ -156,7 +157,8 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
* but still works
*/
-int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
@@ -164,13 +166,13 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
- lock_srbm(adev, mec, pipe, 0, 0);
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
- WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
- unlock_srbm(adev);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
return 0;
}
@@ -220,7 +222,8 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
- uint32_t wptr_mask, struct mm_struct *mm)
+ uint32_t wptr_mask, struct mm_struct *mm,
+ uint32_t inst)
{
struct v9_mqd *m;
uint32_t *mqd_hqd;
@@ -228,21 +231,22 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
m = get_mqd(mqd);
- acquire_queue(adev, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
- hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
for (reg = hqd_base;
- reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL),
+ data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -271,43 +275,43 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
- WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
- (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data);
- release_queue(adev);
+ kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off)
+ uint32_t doorbell_off, uint32_t inst)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
struct v9_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
- acquire_queue(adev, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
@@ -315,7 +319,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
@@ -342,15 +346,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
amdgpu_ring_commit(kiq_ring);
out_unlock:
- spin_unlock(&adev->gfx.kiq.ring_lock);
- release_queue(adev);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ kgd_gfx_v9_release_queue(adev, inst);
return r;
}
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs)
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
@@ -365,13 +369,13 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
if (*dump == NULL)
return -ENOMEM;
- acquire_queue(adev, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
- for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
- reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
- release_queue(adev);
+ kgd_gfx_v9_release_queue(adev, inst);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -481,23 +485,23 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
uint32_t low, high;
- acquire_queue(adev, pipe_id, queue_id);
- act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
- if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
- high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
+ if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
retval = true;
}
- release_queue(adev);
+ kgd_gfx_v9_release_queue(adev, inst);
return retval;
}
@@ -522,7 +526,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
+ uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
@@ -532,10 +536,10 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
if (amdgpu_in_reset(adev))
return -EIO;
- acquire_queue(adev, pipe_id, queue_id);
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
if (m->cp_hqd_vmid == 0)
- WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+ WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -552,22 +556,22 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
break;
}
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
- temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue preemption time out.\n");
- release_queue(adev);
+ kgd_gfx_v9_release_queue(adev, inst);
return -ETIME;
}
usleep_range(500, 1000);
}
- release_queue(adev);
+ kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
@@ -624,14 +628,14 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd)
+ uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
- WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
@@ -640,12 +644,271 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
+/*
+ * GFX9 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~96 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because GFX9.4.1 cannot support multi-process debugging due to trap
+ * configuration and masking being limited to global scope. Always assume
+ * single process conditions.
+ */
+#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall)
+{
+ int i;
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
+ stall ? 1 : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base)
{
@@ -682,10 +945,11 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
* @queue_idx: Index of queue in the queue-map bit-field
* @wave_cnt: Output parameter updated with number of waves in flight
* @vmid: Output parameter updated with VMID of queue whose wave count
- * is being collected
+ * is being collected
+ * @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
- int *wave_cnt, int *vmid)
+ int *wave_cnt, int *vmid, uint32_t inst)
{
int pipe_idx;
int queue_slot;
@@ -700,12 +964,12 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
*wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
- soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
- reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+ soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
+ reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0)
- *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
+ *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
}
@@ -718,9 +982,10 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* @adev: Handle of device from which to get number of waves in flight
* @pasid: Identifies the process for which this query call is invoked
* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
- * belong to process with given pasid
+ * belong to process with given pasid
* @max_waves_per_cu: Output parameter updated with maximum number of waves
- * possible per Compute Unit
+ * possible per Compute Unit
+ * @inst: xcc's instance number on a multi-XCC setup
*
* Note: It's possible that the device has too many queues (oversubscription)
* in which case a VMID could be remapped to a different PASID. This could lead
@@ -756,7 +1021,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* Reading registers referenced above involves programming GRBM appropriately
*/
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu)
+ int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
{
int qidx;
int vmid;
@@ -772,13 +1037,13 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
lock_spi_csq_mutexes(adev);
- soc15_grbm_select(adev, 1, 0, 0, 0);
+ soc15_grbm_select(adev, 1, 0, 0, 0, inst);
/*
* Iterate through the shader engines and arrays of the device
* to get number of waves in flight
*/
- bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
+ bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
KGD_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
@@ -787,8 +1052,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
- amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
- queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);
+ amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
+ queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
/*
* Assumption: queue map encodes following schema: four
@@ -808,10 +1073,11 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
continue;
/* Get number of waves in flight and aggregate them */
- get_wave_count(adev, qidx, &wave_cnt, &vmid);
+ get_wave_count(adev, qidx, &wave_cnt, &vmid,
+ inst);
if (wave_cnt != 0) {
pasid_tmp =
- RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
+ RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
mmIH_VMID_0_LUT) + vmid);
if (pasid_tmp == pasid)
vmid_wave_cnt += wave_cnt;
@@ -820,8 +1086,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
}
}
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
+ soc15_grbm_select(adev, 0, 0, 0, 0, inst);
unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */
@@ -830,28 +1096,51 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
adev->gfx.cu_info.max_waves_per_simd;
}
+void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ /*
+ * The CP cannont handle a 0 grace period input and will result in
+ * an infinite grace period being set so set to 1 to prevent this.
+ */
+ if (grace_period == 0)
+ grace_period = 1;
+
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ grace_period);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
{
- lock_srbm(adev, 0, 0, 0, vmid);
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
/*
* Program TBA registers
*/
- WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
lower_32_bits(tba_addr >> 8));
- WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
upper_32_bits(tba_addr >> 8));
/*
* Program TMA registers
*/
- WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8));
- WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI,
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
upper_32_bits(tma_addr >> 8));
- unlock_srbm(adev);
+ kgd_gfx_v9_unlock_srbm(adev, inst);
}
const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
@@ -871,6 +1160,15 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index c7ed3bc9053c..5f54bff0db49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -20,41 +20,81 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
+ uint32_t sh_mem_bases, uint32_t inst);
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
- unsigned int vmid);
-int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id);
+ unsigned int vmid, uint32_t inst);
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst);
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
+ struct mm_struct *mm, uint32_t inst);
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t doorbell_off);
+ uint32_t doorbell_off, uint32_t inst);
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id);
+ uint32_t queue_id, uint32_t inst);
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
+ uint32_t queue_id, uint32_t inst);
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
- uint32_t sq_cmd);
+ uint32_t sq_cmd, uint32_t inst);
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
-
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu);
+ int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
- uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst);
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id);
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall);
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid);
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
+void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 83a83ced2439..f61527b800e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -35,7 +35,9 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
#include "kfd_smi_events.h"
+#include <drm/ttm/ttm_tt.h>
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
@@ -110,13 +112,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
struct sysinfo si;
uint64_t mem;
+ if (kfd_mem_limit.max_system_mem_limit)
+ return;
+
si_meminfo(&si);
mem = si.freeram - si.freehigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
- kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+ kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
@@ -148,16 +153,20 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* @size: Size of buffer, in bytes, encapsulated by B0. This should be
* equivalent to amdgpu_bo_size(BO)
* @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
*
- * Return: returns -ENOMEM in case of error, ZERO otherwise
+ * Return:
+ * returns -ENOMEM in case of error, ZERO otherwise
*/
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 alloc_flag)
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
+ uint64_t vram_size = 0;
system_mem_needed = 0;
ttm_mem_needed = 0;
@@ -172,6 +181,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
* 2M BO chunk.
*/
vram_needed = size;
+ /*
+ * For GFX 9.4.3, get the VRAM size from XCP structs
+ */
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ return -EINVAL;
+
+ vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+ if (adev->gmc.is_app_apu) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ }
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = size;
} else if (!(alloc_flag &
@@ -191,8 +211,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
- (adev && adev->kfd.vram_used + vram_needed >
- adev->gmc.real_vram_size - reserved_for_pt)) {
+ (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
+ vram_size - reserved_for_pt)) {
ret = -ENOMEM;
goto release;
}
@@ -202,9 +222,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
*/
WARN_ONCE(vram_needed && !adev,
"adev reference can't be null when vram is used");
- if (adev) {
- adev->kfd.vram_used += vram_needed;
- adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ if (adev && xcp_id >= 0) {
+ adev->kfd.vram_used[xcp_id] += vram_needed;
+ adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+ vram_needed :
+ ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
}
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -215,7 +237,7 @@ release:
}
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 alloc_flag)
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
@@ -225,9 +247,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
WARN_ONCE(!adev,
"adev reference can't be null when alloc mem flags vram is set");
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ goto release;
+
if (adev) {
- adev->kfd.vram_used -= size;
- adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ adev->kfd.vram_used[xcp_id] -= size;
+ if (adev->gmc.is_app_apu) {
+ adev->kfd.vram_used_aligned[xcp_id] -= size;
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else {
+ adev->kfd.vram_used_aligned[xcp_id] -=
+ ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
}
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size;
@@ -237,8 +269,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release;
}
- WARN_ONCE(adev && adev->kfd.vram_used < 0,
- "KFD VRAM memory accounting unbalanced");
+ WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+ "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"KFD TTM memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
@@ -254,14 +286,16 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
u32 alloc_flags = bo->kfd_bo->alloc_flags;
u64 size = amdgpu_bo_size(bo);
- amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+ bo->xcp_id);
kfree(bo->kfd_bo);
}
/**
- * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
* about USERPTR or DOOREBELL or MMIO BO.
+ *
* @adev: Device for which dmamap BO is being created
* @mem: BO of peer device that is being DMA mapped. Provides parameters
* in building the dmamap BO
@@ -285,7 +319,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
- ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
+ ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
amdgpu_bo_unreserve(mem->bo);
@@ -527,6 +561,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ int ret;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -659,11 +699,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
static void
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
{
- struct ttm_operation_ctx ctx = {.interruptible = true};
- struct amdgpu_bo *bo = attachment->bo_va->base.bo;
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ /* This is a no-op. We don't want to trigger eviction fences when
+ * unmapping DMABufs. Therefore the invalidation (moving to system
+ * domain) is done in kfd_mem_dmamap_dmabuf.
+ */
}
/**
@@ -804,7 +843,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
* if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device
*/
- if ((adev != bo_adev) &&
+ if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -1599,23 +1638,42 @@ out_unlock:
return ret;
}
-size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
ssize_t available;
+ uint64_t vram_available, system_mem_available, ttm_mem_available;
spin_lock(&kfd_mem_limit.mem_limit_lock);
- available = adev->gmc.real_vram_size
- - adev->kfd.vram_used_aligned
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
- atomic64_read(&adev->vram_pin_size)
- reserved_for_pt;
+
+ if (adev->gmc.is_app_apu) {
+ system_mem_available = no_system_mem_limit ?
+ kfd_mem_limit.max_system_mem_limit :
+ kfd_mem_limit.max_system_mem_limit -
+ kfd_mem_limit.system_mem_used;
+
+ ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+ kfd_mem_limit.ttm_mem_used;
+
+ available = min3(system_mem_available, ttm_mem_available,
+ vram_available);
+ available = ALIGN_DOWN(available, PAGE_SIZE);
+ } else {
+ available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+ }
+
spin_unlock(&kfd_mem_limit.mem_limit_lock);
if (available < 0)
available = 0;
- return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
+ return available;
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@@ -1624,6 +1682,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
uint64_t *offset, uint32_t flags, bool criu_resume)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
@@ -1631,6 +1690,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
uint64_t aligned_size;
+ int8_t xcp_id = -1;
u64 alloc_flags;
int ret;
@@ -1639,9 +1699,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
*/
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
- alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+
+ if (adev->gmc.is_app_apu) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else {
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+ }
+ xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
@@ -1693,17 +1761,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
amdgpu_sync_create(&(*mem)->sync);
- ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+ xcp_id);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}
- pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
- va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));
+ pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+ va, (*mem)->aql_queue ? size << 1 : size,
+ domain_string(alloc_domain), xcp_id);
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
- bo_type, NULL, &gobj);
+ bo_type, NULL, &gobj, xcp_id + 1);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
@@ -1728,6 +1798,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
(*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
+
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
@@ -1759,7 +1830,7 @@ err_node_allow:
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
- amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
@@ -1855,11 +1926,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Update the size of the BO being freed if it was allocated from
- * VRAM and is not imported.
+ * VRAM and is not imported. For APP APU VRAM allocations are done
+ * in GTT domain
*/
if (size) {
- if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
- (!is_imported))
+ if (!is_imported &&
+ (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
+ (adev->gmc.is_app_apu &&
+ mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size;
else
*size = 0;
@@ -2282,8 +2356,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
(*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
- (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
@@ -2445,7 +2520,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
ret = -EAGAIN;
goto unlock_out;
}
- mem->invalid = 0;
+ /* set mem valid if mem has hmm range associated */
+ if (mem->range)
+ mem->invalid = 0;
}
unlock_out:
@@ -2577,8 +2654,15 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list,
validate_list.head) {
- bool valid = amdgpu_ttm_tt_get_user_pages_done(
- mem->bo->tbo.ttm, mem->range);
+ bool valid;
+
+ /* keep mem without hmm range at userptr_inval_list */
+ if (!mem->range)
+ continue;
+
+ /* Only check mem with hmm range associated */
+ valid = amdgpu_ttm_tt_get_user_pages_done(
+ mem->bo->tbo.ttm, mem->range);
mem->range = NULL;
if (!valid) {
@@ -2586,7 +2670,12 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
ret = -EAGAIN;
continue;
}
- WARN(mem->invalid, "Valid BO is marked invalid");
+
+ if (mem->invalid) {
+ WARN(1, "Valid BO is marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index ac6fe0ae4609..ef4b9a41f20a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -272,6 +272,7 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
break;
case ATOM_DGPU_VRAM_TYPE_HBM2:
case ATOM_DGPU_VRAM_TYPE_HBM2E:
+ case ATOM_DGPU_VRAM_TYPE_HBM3:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
case ATOM_DGPU_VRAM_TYPE_GDDR6:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 30c28a69e847..b582b83c4984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -104,9 +104,8 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0);
bios = ioremap_wc(vram_base, size);
- if (!bios) {
+ if (!bios)
return false;
- }
adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) {
@@ -133,9 +132,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
adev->bios = NULL;
/* XXX: some cards may return 0 for rom size? ddx has a workaround */
bios = pci_map_rom(adev->pdev, &size);
- if (!bios) {
+ if (!bios)
return false;
- }
adev->bios = kzalloc(size, GFP_KERNEL);
if (adev->bios == NULL) {
@@ -168,9 +166,9 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
header[AMD_VBIOS_SIGNATURE_END] = 0;
if ((!AMD_IS_VALID_VBIOS(header)) ||
- 0 != memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
- AMD_VBIOS_SIGNATURE,
- strlen(AMD_VBIOS_SIGNATURE)))
+ memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
+ AMD_VBIOS_SIGNATURE,
+ strlen(AMD_VBIOS_SIGNATURE)) != 0)
return false;
/* valid vbios, go on */
@@ -264,7 +262,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
if (ACPI_FAILURE(status)) {
- printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+ DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
return -ENODEV;
}
@@ -363,7 +361,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
struct acpi_table_header *hdr;
acpi_size tbl_size;
UEFI_ACPI_VFCT *vfct;
- unsigned offset;
+ unsigned int offset;
if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 6be30dcb029d..d34037b85cf8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -593,11 +593,20 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
switch (val) {
default:
- case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
- case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
- case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
- case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
}
+
if (amdgpu_encoder->rmx_type == rmx_type)
return 0;
@@ -799,12 +808,21 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
}
switch (value) {
- case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
- case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
- case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
default:
- case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
}
+
if (amdgpu_encoder->rmx_type == rmx_type)
return 0;
@@ -1127,7 +1145,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* assume digital unless load detected otherwise */
amdgpu_connector->use_digital = true;
lret = encoder_funcs->detect(encoder, connector);
- DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret);
+ DRM_DEBUG_KMS("load_detect %x returned: %x\n",
+ encoder->encoder_type, lret);
if (lret == connector_status_connected)
amdgpu_connector->use_digital = false;
}
@@ -1991,7 +2010,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
if (i2c_bus->valid) {
connector->polled = DRM_CONNECTOR_POLL_CONNECT |
- DRM_CONNECTOR_POLL_DISCONNECT;
+ DRM_CONNECTOR_POLL_DISCONNECT;
}
} else
connector->polled = DRM_CONNECTOR_POLL_HPD;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2eb2c66843a8..d9503882ea97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -112,6 +112,9 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
if (r < 0)
return r;
+ if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+ return -EINVAL;
+
++(num_ibs[r]);
p->gang_leader_idx = r;
return 0;
@@ -192,7 +195,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
uint64_t *chunk_array_user;
uint64_t *chunk_array;
uint32_t uf_offset = 0;
- unsigned int size;
+ size_t size;
int ret;
int i;
@@ -285,6 +288,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
default:
@@ -305,7 +309,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
p->gang_leader = p->jobs[p->gang_leader_idx];
- if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
+ if (p->ctx->generation != p->gang_leader->generation) {
ret = -ECANCELED;
goto free_all_kdata;
}
@@ -393,7 +397,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
{
struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- unsigned num_deps;
+ unsigned int num_deps;
int i, r;
num_deps = chunk->length_dw * 4 /
@@ -464,7 +468,7 @@ static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
- unsigned num_deps;
+ unsigned int num_deps;
int i, r;
num_deps = chunk->length_dw * 4 /
@@ -482,7 +486,7 @@ static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
- unsigned num_deps;
+ unsigned int num_deps;
int i, r;
num_deps = chunk->length_dw * 4 /
@@ -502,7 +506,7 @@ static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
- unsigned num_deps;
+ unsigned int num_deps;
int i;
num_deps = chunk->length_dw * 4 /
@@ -536,7 +540,7 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
- unsigned num_deps;
+ unsigned int num_deps;
int i;
num_deps = chunk->length_dw * 4 /
@@ -575,6 +579,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
return 0;
}
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ int i;
+
+ if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+ return -EINVAL;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;
+ p->jobs[i]->init_shadow =
+ shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+ }
+
+ return 0;
+}
+
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -617,6 +641,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ r = amdgpu_cs_p2_shadow(p, chunk);
+ if (r)
+ return r;
+ break;
}
}
@@ -729,6 +758,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram;
+
adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
increment_us, us_upper_bound);
@@ -1047,9 +1077,8 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
+ if (r)
return r;
- }
kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
@@ -1356,7 +1385,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* Cleanup the parser structure */
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
- unsigned i;
+ unsigned int i;
amdgpu_sync_free(&parser->sync);
for (i = 0; i < parser->num_post_deps; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index c6d4d41c4393..23d054526e7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -106,3 +106,41 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}
+
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr)
+{
+ struct ww_acquire_ctx ticket;
+ struct list_head list;
+ struct amdgpu_bo_list_entry pd;
+ struct ttm_validate_buffer csa_tv;
+ int r;
+
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&csa_tv.head);
+ csa_tv.bo = &bo->tbo;
+ csa_tv.num_shared = 1;
+
+ list_add(&csa_tv.head, &list);
+ amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+ if (r) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ return r;
+ }
+
+ r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+ if (r) {
+ DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+ ttm_eu_backoff_reservation(&ticket, &list);
+ return r;
+ }
+
+ amdgpu_vm_bo_del(adev, bo_va);
+
+ ttm_eu_backoff_reservation(&ticket, &list);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
index 524b4437a021..7dfc1f2012eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size);
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr);
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d2139ac12159..0dc9c655c4fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -222,8 +222,19 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
- scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
- num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+
+ if (!(adev)->xcp_mgr) {
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ } else {
+ struct amdgpu_fpriv *fpriv;
+
+ fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
+ r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
+ &num_scheds, &scheds);
+ if (r)
+ goto cleanup_entity;
+ }
/* disable load balance if the hw engine retains context among dependent jobs */
if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
@@ -255,7 +266,8 @@ error_free_entity:
return r;
}
-static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
{
ktime_t res = ns_to_ktime(0);
int i;
@@ -268,6 +280,8 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
dma_fence_put(entity->fences[i]);
}
+ amdgpu_xcp_release_sched(adev, entity);
+
kfree(entity);
return res;
}
@@ -303,6 +317,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
struct drm_file *filp, struct amdgpu_ctx *ctx)
{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
u32 current_stable_pstate;
int r;
@@ -318,7 +333,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
+ ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
ctx->init_priority = priority;
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
@@ -331,6 +346,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
else
ctx->stable_pstate = current_stable_pstate;
+ ctx->ctx_mgr = &(fpriv->ctx_mgr);
return 0;
}
@@ -399,7 +415,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
ktime_t spend;
- spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+ spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
}
}
@@ -416,6 +432,7 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
int r;
+ struct drm_sched_entity *ctx_entity;
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
@@ -439,7 +456,14 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return r;
}
- *entity = &ctx->entities[hw_ip][ring]->entity;
+ ctx_entity = &ctx->entities[hw_ip][ring]->entity;
+ r = drm_sched_entity_error(ctx_entity);
+ if (r) {
+ DRM_DEBUG("error entity %p\n", ctx_entity);
+ return r;
+ }
+
+ *entity = ctx_entity;
return 0;
}
@@ -570,12 +594,15 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
- if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+ if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
+ if (amdgpu_in_reset(adev))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
+
if (adev->ras_enabled && con) {
/* Return the cached values in O(1),
* and schedule delayed work to cache
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..85376baaa92f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -47,7 +47,7 @@ struct amdgpu_ctx {
struct amdgpu_ctx_mgr *mgr;
unsigned reset_counter;
unsigned reset_counter_query;
- uint32_t vram_lost_counter;
+ uint64_t generation;
spinlock_t ring_lock;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
uint32_t stable_pstate;
+ struct amdgpu_ctx_mgr *ctx_mgr;
};
struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f60753f97ac5..56e89e76ff17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -56,14 +56,14 @@
*
* Bit 62: Indicates a GRBM bank switch is needed
* Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
- * zero)
+ * zero)
* Bits 24..33: The SE or ME selector if needed
* Bits 34..43: The SH (or SA) or PIPE selector if needed
* Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
*
* Bit 23: Indicates that the PM power gating lock should be held
- * This is necessary to read registers that might be
- * unreliable during a power gating transistion.
+ * This is necessary to read registers that might be
+ * unreliable during a power gating transistion.
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
@@ -76,7 +76,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
ssize_t result = 0;
int r;
bool pm_pg_lock, use_bank, use_ring;
- unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
+ unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
pm_pg_lock = use_bank = use_ring = false;
instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
@@ -136,10 +136,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
}
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank,
- sh_bank, instance_bank);
+ sh_bank, instance_bank, 0);
} else if (use_ring) {
mutex_lock(&adev->srbm_mutex);
- amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
+ amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
}
if (pm_pg_lock)
@@ -169,10 +169,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
end:
if (use_bank) {
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
} else if (use_ring) {
- amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -208,7 +208,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
{
struct amdgpu_debugfs_regs2_data *rd;
- rd = kzalloc(sizeof *rd, GFP_KERNEL);
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd)
return -ENOMEM;
rd->adev = file_inode(file)->i_private;
@@ -221,6 +221,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
{
struct amdgpu_debugfs_regs2_data *rd = file->private_data;
+
mutex_destroy(&rd->lock);
kfree(file->private_data);
return 0;
@@ -262,14 +263,14 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
}
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
- rd->id.grbm.sh,
- rd->id.grbm.instance);
+ rd->id.grbm.sh,
+ rd->id.grbm.instance, rd->id.xcc_id);
}
if (rd->id.use_srbm) {
mutex_lock(&adev->srbm_mutex);
amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
- rd->id.srbm.queue, rd->id.srbm.vmid);
+ rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
}
if (rd->id.pg_lock)
@@ -295,12 +296,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
}
end:
if (rd->id.use_grbm) {
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
}
if (rd->id.use_srbm) {
- amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
mutex_unlock(&adev->srbm_mutex);
}
@@ -319,18 +320,45 @@ end:
static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
{
struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+ struct amdgpu_debugfs_regs2_iocdata v1_data;
int r;
+ mutex_lock(&rd->lock);
+
switch (cmd) {
+ case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
+ r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
+ sizeof(rd->id));
+ if (r)
+ r = -EINVAL;
+ goto done;
case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
- mutex_lock(&rd->lock);
- r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->id);
- mutex_unlock(&rd->lock);
- return r ? -EINVAL : 0;
+ r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
+ sizeof(v1_data));
+ if (r) {
+ r = -EINVAL;
+ goto done;
+ }
+ goto v1_copy;
default:
- return -EINVAL;
- }
- return 0;
+ r = -EINVAL;
+ goto done;
+ }
+
+v1_copy:
+ rd->id.use_srbm = v1_data.use_srbm;
+ rd->id.use_grbm = v1_data.use_grbm;
+ rd->id.pg_lock = v1_data.pg_lock;
+ rd->id.grbm.se = v1_data.grbm.se;
+ rd->id.grbm.sh = v1_data.grbm.sh;
+ rd->id.grbm.instance = v1_data.grbm.instance;
+ rd->id.srbm.me = v1_data.srbm.me;
+ rd->id.srbm.pipe = v1_data.srbm.pipe;
+ rd->id.srbm.queue = v1_data.srbm.queue;
+ rd->id.xcc_id = 0;
+done:
+ mutex_unlock(&rd->lock);
+ return r;
}
static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
@@ -343,6 +371,136 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf
return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
}
+static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd;
+
+ rd = kzalloc(sizeof *rd, GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+ rd->adev = file_inode(file)->i_private;
+ file->private_data = rd;
+ mutex_init(&rd->lock);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
+ mutex_destroy(&rd->lock);
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ struct amdgpu_device *adev = rd->adev;
+ ssize_t result = 0;
+ int r;
+ uint32_t *data, x;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -ENOMEM;
+ }
+
+ /* switch to the specific se/sh/cu */
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
+
+ if (!rd->id.gpr_or_wave) {
+ x = 0;
+ if (adev->gfx.funcs->read_wave_data)
+ adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
+ } else {
+ x = size >> 2;
+ if (rd->id.gpr.vpgr_or_sgpr) {
+ if (adev->gfx.funcs->read_wave_vgprs)
+ adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
+ } else {
+ if (adev->gfx.funcs->read_wave_sgprs)
+ adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
+ }
+ }
+
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ if (!x) {
+ result = -EINVAL;
+ goto done;
+ }
+
+ while (size && (*pos < x * 4)) {
+ uint32_t value;
+
+ value = data[*pos >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto done;
+ }
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+done:
+ amdgpu_virt_disable_access_debugfs(adev);
+ kfree(data);
+ return result;
+}
+
+static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ int r = 0;
+
+ mutex_lock(&rd->lock);
+
+ switch (cmd) {
+ case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
+ if (copy_from_user(&rd->id,
+ (struct amdgpu_debugfs_gprwave_iocdata *)data,
+ sizeof(rd->id)))
+ r = -EFAULT;
+ goto done;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+done:
+ mutex_unlock(&rd->lock);
+ return r;
+}
+
+
+
/**
* amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
@@ -863,7 +1021,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
* The offset being sought changes which wave that the status data
* will be returned for. The bits are used as follows:
*
- * Bits 0..6: Byte offset into data
+ * Bits 0..6: Byte offset into data
* Bits 7..14: SE selector
* Bits 15..22: SH/SA selector
* Bits 23..30: CU/{WGP+SIMD} selector
@@ -907,13 +1065,13 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
- amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
x = 0;
if (adev->gfx.funcs->read_wave_data)
- adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
+ adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
- amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -1001,17 +1159,17 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
- amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
if (bank == 0) {
if (adev->gfx.funcs->read_wave_vgprs)
- adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
+ adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
} else {
if (adev->gfx.funcs->read_wave_sgprs)
- adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
+ adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
}
- amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -1339,6 +1497,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_gprwave_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
+ .read = amdgpu_debugfs_gprwave_read,
+ .open = amdgpu_debugfs_gprwave_open,
+ .release = amdgpu_debugfs_gprwave_release,
+ .llseek = default_llseek
+};
+
static const struct file_operations amdgpu_debugfs_regs_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_regs_read,
@@ -1416,6 +1583,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops,
+ &amdgpu_debugfs_gprwave_fops,
&amdgpu_debugfs_regs_didt_fops,
&amdgpu_debugfs_regs_pcie_fops,
&amdgpu_debugfs_regs_smc_fops,
@@ -1429,9 +1597,10 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_gfxoff_residency_fops,
};
-static const char *debugfs_regs_names[] = {
+static const char * const debugfs_regs_names[] = {
"amdgpu_regs",
"amdgpu_regs2",
+ "amdgpu_gprwave",
"amdgpu_regs_didt",
"amdgpu_regs_pcie",
"amdgpu_regs_smc",
@@ -1447,7 +1616,7 @@ static const char *debugfs_regs_names[] = {
/**
* amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
- * register access.
+ * register access.
*
* @adev: The device to attach the debugfs entries to
*/
@@ -1459,7 +1628,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
- S_IFREG | S_IRUGO, root,
+ S_IFREG | 0444, root,
adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
@@ -1470,7 +1639,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev);
int r = 0, i;
@@ -1494,12 +1663,12 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
kthread_park(ring->sched.thread);
}
- seq_printf(m, "run ib test:\n");
+ seq_puts(m, "run ib test:\n");
r = amdgpu_ib_ring_tests(adev);
if (r)
seq_printf(m, "ib ring tests failed (%d).\n", r);
else
- seq_printf(m, "ib ring tests passed.\n");
+ seq_puts(m, "ib ring tests passed.\n");
/* go on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -1581,7 +1750,7 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val)
static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file;
int r;
@@ -1978,7 +2147,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_ring_init(adev, ring);
}
- for ( i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (!amdgpu_vcnfw_log)
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5c7d40873ee2..e25f085ee886 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -707,6 +707,48 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
return r;
}
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ u32 r;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r = readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
/**
* amdgpu_device_indirect_rreg64 - read a 64bits indirect register
*
@@ -747,8 +789,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
* amdgpu_device_indirect_wreg - write an indirect register address
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register offset
* @reg_data: indirect register data
*
@@ -774,12 +814,50 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
/**
* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
*
* @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
* @reg_addr: indirect register offset
* @reg_data: indirect register data
*
@@ -840,6 +918,13 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
return 0;
}
+static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
+ BUG();
+ return 0;
+}
+
/**
* amdgpu_invalid_wreg - dummy reg write function
*
@@ -857,6 +942,13 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
BUG();
}
+static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
+{
+ DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
+ reg, v);
+ BUG();
+}
+
/**
* amdgpu_invalid_rreg64 - dummy 64 bit reg read function
*
@@ -942,7 +1034,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
amdgpu_asic_pre_asic_init(adev);
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
+ adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
return amdgpu_atomfirmware_asic_init(adev, true);
else
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -998,7 +1091,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
if (array_size % 3)
return;
- for (i = 0; i < array_size; i +=3) {
+ for (i = 0; i < array_size; i += 3) {
reg = registers[i + 0];
and_mask = registers[i + 1];
or_mask = registers[i + 2];
@@ -1090,7 +1183,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
* doorbells are in the first page. So with paging queue enabled,
* the max num_kernel_doorbells should + 1 page (0x400 in dword)
*/
- if (adev->asic_type >= CHIP_VEGA10)
+ if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
+ adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
adev->doorbell.num_kernel_doorbells += 0x400;
}
@@ -1291,6 +1385,15 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
return 0;
}
+static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
+{
+ if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) {
+ return false;
+ }
+
+ return true;
+}
+
/*
* GPU helpers function.
*/
@@ -1310,6 +1413,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return false;
+ if (!amdgpu_device_read_bios(adev))
+ return false;
+
if (amdgpu_passthrough(adev)) {
/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
* some old smc fw still need driver do vPost otherwise gpu hang, while
@@ -1547,7 +1653,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
amdgpu_sched_jobs = 4;
- } else if (!is_power_of_2(amdgpu_sched_jobs)){
+ } else if (!is_power_of_2(amdgpu_sched_jobs)) {
dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
@@ -2194,7 +2300,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
- DRM_ERROR("disabled ip block: %d <%s>\n",
+ DRM_WARN("disabled ip block: %d <%s>\n",
i, adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
} else {
@@ -2220,14 +2326,16 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return r;
/* Read BIOS */
- if (!amdgpu_get_bios(adev))
- return -EINVAL;
+ if (amdgpu_device_read_bios(adev)) {
+ if (!amdgpu_get_bios(adev))
+ return -EINVAL;
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- return r;
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+ return r;
+ }
}
/*get pf2vf msg info at it's earliest time*/
@@ -2376,6 +2484,8 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
}
}
+ amdgpu_xcp_update_partition_sched_list(adev);
+
return 0;
}
@@ -2533,8 +2643,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
goto init_failed;
/* Don't init kfd if whole hive need to be reset during init */
- if (!adev->gmc.xgmi.pending_reset)
+ if (!adev->gmc.xgmi.pending_reset) {
+ kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
+ }
amdgpu_fru_get_product_info(adev);
@@ -2759,8 +2871,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
- if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
- adev->asic_type == CHIP_ALDEBARAN ))
+ if (amdgpu_passthrough(adev) &&
+ ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+ adev->asic_type == CHIP_ALDEBARAN))
amdgpu_dpm_handle_passthrough_sbr(adev, true);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -3089,7 +3202,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
}
adev->ip_blocks[i].status.hw = false;
/* handle putting the SMC in the appropriate state */
- if(!amdgpu_sriov_vf(adev)){
+ if (!amdgpu_sriov_vf(adev)) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
if (r) {
@@ -3608,6 +3721,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->smc_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg = &amdgpu_invalid_rreg;
adev->pcie_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
@@ -3633,6 +3748,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->gfx.gfx_off_mutex);
+ mutex_init(&adev->gfx.partition_mutex);
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
@@ -3708,8 +3824,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
- amdgpu_device_get_pcie_info(adev);
-
if (amdgpu_mcbp)
DRM_INFO("MCBP is enabled\n");
@@ -3725,6 +3839,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
+ amdgpu_device_get_pcie_info(adev);
+
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
@@ -3753,21 +3869,24 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
/* enable PCIE atomic ops */
- if (amdgpu_sriov_vf(adev))
- adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
- adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
- (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->virt.fw_reserve.p_pf2vf)
+ adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
+ adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
+ (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
* internal path natively support atomics, set have_atomics_support to true.
*/
- else if ((adev->flags & AMD_IS_APU) &&
- (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
+ } else if ((adev->flags & AMD_IS_APU) &&
+ (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
adev->have_atomics_support = true;
- else
+ } else {
adev->have_atomics_support =
!pci_enable_atomic_ops_to_root(adev->pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ }
+
if (!adev->have_atomics_support)
dev_info(adev->dev, "PCIE atomic ops is not supported\n");
@@ -3783,7 +3902,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_reset_init(adev);
/* detect if we are with an SRIOV vbios */
- amdgpu_device_detect_sriov_bios(adev);
+ if (adev->bios)
+ amdgpu_device_detect_sriov_bios(adev);
/* check if we need to reset the asic
* E.g., driver was not cleanly unloaded previously, etc.
@@ -3835,25 +3955,27 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
- if (adev->is_atom_fw) {
- /* Initialize clocks */
- r = amdgpu_atomfirmware_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
- goto failed;
- }
- } else {
- /* Initialize clocks */
- r = amdgpu_atombios_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
- goto failed;
+ if (adev->bios) {
+ if (adev->is_atom_fw) {
+ /* Initialize clocks */
+ r = amdgpu_atomfirmware_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ } else {
+ /* Initialize clocks */
+ r = amdgpu_atombios_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ /* init i2c buses */
+ if (!amdgpu_device_has_dc_support(adev))
+ amdgpu_atombios_i2c_init(adev);
}
- /* init i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_atombios_i2c_init(adev);
}
fence_driver_init:
@@ -4019,7 +4141,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = NULL;
/* Memory manager related */
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
}
@@ -4049,7 +4171,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
- if (adev->mode_info.mode_config_initialized){
+ if (adev->mode_info.mode_config_initialized) {
if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
drm_helper_force_disable_all(adev_to_drm(adev));
else
@@ -4714,42 +4836,42 @@ disabled:
int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
{
- u32 i;
- int ret = 0;
+ u32 i;
+ int ret = 0;
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
- dev_info(adev->dev, "GPU mode1 reset\n");
+ dev_info(adev->dev, "GPU mode1 reset\n");
- /* disable BM */
- pci_clear_master(adev->pdev);
+ /* disable BM */
+ pci_clear_master(adev->pdev);
- amdgpu_device_cache_pci_state(adev->pdev);
+ amdgpu_device_cache_pci_state(adev->pdev);
- if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
- dev_info(adev->dev, "GPU smu mode1 reset\n");
- ret = amdgpu_dpm_mode1_reset(adev);
- } else {
- dev_info(adev->dev, "GPU psp mode1 reset\n");
- ret = psp_gpu_reset(adev);
- }
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
- if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
- amdgpu_device_load_pci_state(adev->pdev);
+ amdgpu_device_load_pci_state(adev->pdev);
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio.funcs->get_memsize(adev);
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
- if (memsize != 0xffffffff)
- break;
- udelay(1);
- }
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
- return ret;
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+ return ret;
}
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
@@ -5478,7 +5600,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
/* covers APUs as well */
- if (pci_is_root_bus(adev->pdev->bus)) {
+ if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
if (adev->pm.pcie_gen_mask == 0)
adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
if (adev->pm.pcie_mlw_mask == 0)
@@ -5959,6 +6081,7 @@ void amdgpu_device_halt(struct amdgpu_device *adev)
struct pci_dev *pdev = adev->pdev;
struct drm_device *ddev = adev_to_drm(adev);
+ amdgpu_xcp_dev_unplug(adev);
drm_dev_unplug(ddev);
amdgpu_irq_disable_all(adev);
@@ -6079,3 +6202,31 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
return true;
}
}
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask)
+{
+ uint32_t ret = 0;
+ uint32_t old_ = 0;
+ uint32_t tmp_ = RREG32(reg_addr);
+ uint32_t loop = adev->usec_timeout;
+
+ while ((tmp_ & (mask)) != (expected_value)) {
+ if (old_ != tmp_) {
+ loop = adev->usec_timeout;
+ old_ = tmp_;
+ } else
+ udelay(1);
+ tmp_ = RREG32(reg_addr);
+ loop--;
+ if (!loop) {
+ DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+ inst, reg_name, (uint32_t)expected_value,
+ (uint32_t)(tmp_ & (mask)));
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0ecce0b92b82..8e1cfc87122d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -30,6 +30,7 @@
#include "soc15.h"
#include "gfx_v9_0.h"
+#include "gfx_v9_4_3.h"
#include "gmc_v9_0.h"
#include "df_v1_7.h"
#include "df_v3_6.h"
@@ -76,12 +77,15 @@
#include "jpeg_v3_0.h"
#include "vcn_v4_0.h"
#include "jpeg_v4_0.h"
+#include "vcn_v4_0_3.h"
+#include "jpeg_v4_0_3.h"
#include "amdgpu_vkms.h"
#include "mes_v10_1.h"
#include "mes_v11_0.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#include "smuio_v13_0.h"
+#include "smuio_v13_0_3.h"
#include "smuio_v13_0_6.h"
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
@@ -200,14 +204,44 @@ static int hw_id_map[MAX_HWIP] = {
[PCIE_HWIP] = PCIE_HWID,
};
-static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary)
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
+{
+ u64 tmr_offset, tmr_size, pos;
+ void *discv_regn;
+ int ret;
+
+ ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+ if (ret)
+ return ret;
+
+ pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+
+ /* This region is read-only and reserved from system use */
+ discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
+ if (discv_regn) {
+ memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
+ memunmap(discv_regn);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+ uint8_t *binary)
{
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+ int ret = 0;
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
- return 0;
+ if (vram_size) {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->mman.discovery_tmr_size, false);
+ } else {
+ ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+ }
+
+ return ret;
}
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
@@ -280,6 +314,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
case 0xCF:
case 0xDF:
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
break;
default:
break;
@@ -301,33 +336,30 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (!adev->mman.discovery_bin)
return -ENOMEM;
- r = amdgpu_discovery_read_binary_from_vram(adev, adev->mman.discovery_bin);
- if (r) {
- dev_err(adev->dev, "failed to read ip discovery binary from vram\n");
- r = -EINVAL;
- goto out;
- }
-
- if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) {
- /* ignore the discovery binary from vram if discovery=2 in kernel module parameter */
- if (amdgpu_discovery == 2)
- dev_info(adev->dev,"force read ip discovery binary from file");
- else
- dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n");
-
- /* retry read ip discovery binary from file */
+ /* Read from file if it is the preferred option */
+ if (amdgpu_discovery == 2) {
+ dev_info(adev->dev, "use ip discovery information from file");
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
+
if (r) {
dev_err(adev->dev, "failed to read ip discovery binary from file\n");
r = -EINVAL;
goto out;
}
- /* check the ip discovery binary signature */
- if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
- dev_warn(adev->dev, "get invalid ip discovery binary signature from file\n");
- r = -EINVAL;
+
+ } else {
+ r = amdgpu_discovery_read_binary_from_mem(
+ adev, adev->mman.discovery_bin);
+ if (r)
goto out;
- }
+ }
+
+ /* check the ip discovery binary signature */
+ if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
+ dev_err(adev->dev,
+ "get invalid ip discovery binary signature\n");
+ r = -EINVAL;
+ goto out;
}
bhdr = (struct binary_header *)adev->mman.discovery_bin;
@@ -471,11 +503,11 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev)
adev->mman.discovery_bin = NULL;
}
-static int amdgpu_discovery_validate_ip(const struct ip *ip)
+static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip)
{
- if (ip->number_instance >= HWIP_MAX_INSTANCE) {
- DRM_ERROR("Unexpected number_instance (%d) from ip discovery blob\n",
- ip->number_instance);
+ if (ip->instance_number >= HWIP_MAX_INSTANCE) {
+ DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n",
+ ip->instance_number);
return -EINVAL;
}
if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
@@ -493,7 +525,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
- struct ip *ip;
+ struct ip_v4 *ip;
uint16_t die_offset, ip_offset, num_dies, num_ips;
int i, j;
@@ -510,29 +542,41 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
if (amdgpu_discovery_validate_ip(ip))
goto next_ip;
- if (le16_to_cpu(ip->harvest) == 1) {
+ if (le16_to_cpu(ip->variant) == 1) {
switch (le16_to_cpu(ip->hw_id)) {
case VCN_HWID:
(*vcn_harvest_count)++;
- if (ip->number_instance == 0)
+ if (ip->instance_number == 0) {
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
- else
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ } else {
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ }
break;
case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
break;
default:
break;
- }
- }
+ }
+ }
next_ip:
- ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ if (ihdr->base_addr_64_bit)
+ ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
}
@@ -564,10 +608,15 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
case VCN_HWID:
(*vcn_harvest_count)++;
- if (harvest_info->list[i].number_instance == 0)
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
- else
- adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+ adev->jpeg.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+
+ adev->vcn.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ adev->jpeg.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
break;
case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
@@ -577,6 +626,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
1 << (le16_to_cpu(harvest_info->list[i].number_instance));
(*umc_harvest_count)++;
break;
+ case GC_HWID:
+ adev->gfx.xcc_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case SDMA0_HWID:
+ adev->sdma.sdma_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
default:
break;
}
@@ -836,9 +893,40 @@ static void ip_disc_release(struct kobject *kobj)
kfree(ip_top);
}
+static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
+ uint16_t hw_id, uint8_t inst)
+{
+ uint8_t harvest = 0;
+
+ /* Until a uniform way is figured, get mask based on hwid */
+ switch (hw_id) {
+ case VCN_HWID:
+ harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
+ break;
+ case DMU_HWID:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
+ harvest = 0x1;
+ break;
+ case UMC_HWID:
+ /* TODO: It needs another parsing; for now, ignore.*/
+ break;
+ case GC_HWID:
+ harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
+ break;
+ case SDMA0_HWID:
+ harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
+ break;
+ default:
+ break;
+ }
+
+ return harvest;
+}
+
static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
struct ip_die_entry *ip_die_entry,
- const size_t _ip_offset, const int num_ips)
+ const size_t _ip_offset, const int num_ips,
+ bool reg_base_64)
{
int ii, jj, kk, res;
@@ -852,10 +940,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
size_t ip_offset = _ip_offset;
for (jj = 0; jj < num_ips; jj++) {
- struct ip *ip;
+ struct ip_v4 *ip;
struct ip_hw_instance *ip_hw_instance;
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
if (amdgpu_discovery_validate_ip(ip) ||
le16_to_cpu(ip->hw_id) != ii)
goto next_ip;
@@ -903,22 +991,35 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
return -ENOMEM;
}
ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
- ip_hw_instance->num_instance = ip->number_instance;
+ ip_hw_instance->num_instance = ip->instance_number;
ip_hw_instance->major = ip->major;
ip_hw_instance->minor = ip->minor;
ip_hw_instance->revision = ip->revision;
- ip_hw_instance->harvest = ip->harvest;
+ ip_hw_instance->harvest =
+ amdgpu_discovery_get_harvest_info(
+ adev, ip_hw_instance->hw_id,
+ ip_hw_instance->num_instance);
ip_hw_instance->num_base_addresses = ip->num_base_address;
- for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++)
- ip_hw_instance->base_addr[kk] = ip->base_address[kk];
+ for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
+ if (reg_base_64)
+ ip_hw_instance->base_addr[kk] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
+ else
+ ip_hw_instance->base_addr[kk] = ip->base_address[kk];
+ }
kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
res = kobject_add(&ip_hw_instance->kobj, NULL,
"%d", ip_hw_instance->num_instance);
next_ip:
- ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ if (reg_base_64)
+ ip_offset += struct_size(ip, base_address_64,
+ ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
}
}
@@ -972,7 +1073,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
return res;
}
- amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips);
+ amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
}
return 0;
@@ -983,6 +1084,9 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
struct kset *die_kset;
int res, ii;
+ if (!adev->mman.discovery_bin)
+ return -EINVAL;
+
adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
if (!adev->ip_top)
return -ENOMEM;
@@ -1082,7 +1186,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
- struct ip *ip;
+ struct ip_v4 *ip;
uint16_t die_offset;
uint16_t ip_offset;
uint16_t num_dies;
@@ -1098,6 +1202,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
return r;
}
+ adev->gfx.xcc_mask = 0;
+ adev->sdma.sdma_mask = 0;
+ adev->vcn.inst_mask = 0;
+ adev->jpeg.inst_mask = 0;
bhdr = (struct binary_header *)adev->mman.discovery_bin;
ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
@@ -1121,7 +1229,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(dhdr->die_id), num_ips);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
if (amdgpu_discovery_validate_ip(ip))
goto next_ip;
@@ -1131,7 +1239,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
hw_id_names[le16_to_cpu(ip->hw_id)],
le16_to_cpu(ip->hw_id),
- ip->number_instance,
+ ip->instance_number,
ip->major, ip->minor,
ip->revision);
@@ -1145,23 +1253,33 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
ip->revision & 0xc0;
ip->revision &= ~0xc0;
- if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES)
+ if (adev->vcn.num_vcn_inst <
+ AMDGPU_MAX_VCN_INSTANCES) {
adev->vcn.num_vcn_inst++;
- else
+ adev->vcn.inst_mask |=
+ (1U << ip->instance_number);
+ adev->jpeg.inst_mask |=
+ (1U << ip->instance_number);
+ } else {
dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
adev->vcn.num_vcn_inst + 1,
AMDGPU_MAX_VCN_INSTANCES);
+ }
}
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
- if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES)
+ if (adev->sdma.num_instances <
+ AMDGPU_MAX_SDMA_INSTANCES) {
adev->sdma.num_instances++;
- else
+ adev->sdma.sdma_mask |=
+ (1U << ip->instance_number);
+ } else {
dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
adev->sdma.num_instances + 1,
AMDGPU_MAX_SDMA_INSTANCES);
+ }
}
if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
@@ -1169,20 +1287,38 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->umc.node_inst_num++;
}
+ if (le16_to_cpu(ip->hw_id) == GC_HWID)
+ adev->gfx.xcc_mask |=
+ (1U << ip->instance_number);
+
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
* so that we don't need to convert them when accessing adev->reg_offset.
*/
- ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+ if (ihdr->base_addr_64_bit)
+ /* Truncate the 64bit base address from ip discovery
+ * and only store lower 32bit ip base in reg_offset[].
+ * Bits > 32 follows ASIC specific format, thus just
+ * discard them and handle it within specific ASIC.
+ * By this way reg_offset[] and related helpers can
+ * stay unchanged.
+ * The base address is in dwords, thus clear the
+ * highest 2 bits to store.
+ */
+ ip->base_address[k] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
+ else
+ ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
}
for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
- if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
+ if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
+ hw_id_map[hw_ip] != 0) {
DRM_DEBUG("set register base offset for %s\n",
hw_id_names[le16_to_cpu(ip->hw_id)]);
- adev->reg_offset[hw_ip][ip->number_instance] =
+ adev->reg_offset[hw_ip][ip->instance_number] =
ip->base_address;
/* Instance support is somewhat inconsistent.
* SDMA is a good example. Sienna cichlid has 4 total
@@ -1193,69 +1329,22 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
* example. On most chips there are multiple instances
* with the same HWID.
*/
- adev->ip_versions[hw_ip][ip->number_instance] =
+ adev->ip_versions[hw_ip][ip->instance_number] =
IP_VERSION(ip->major, ip->minor, ip->revision);
}
}
next_ip:
- ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ if (ihdr->base_addr_64_bit)
+ ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
- amdgpu_discovery_sysfs_init(adev);
-
return 0;
}
-int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
- int *major, int *minor, int *revision)
-{
- struct binary_header *bhdr;
- struct ip_discovery_header *ihdr;
- struct die_header *dhdr;
- struct ip *ip;
- uint16_t die_offset;
- uint16_t ip_offset;
- uint16_t num_dies;
- uint16_t num_ips;
- int i, j;
-
- if (!adev->mman.discovery_bin) {
- DRM_ERROR("ip discovery uninitialized\n");
- return -EINVAL;
- }
-
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
- num_dies = le16_to_cpu(ihdr->num_dies);
-
- for (i = 0; i < num_dies; i++) {
- die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
- num_ips = le16_to_cpu(dhdr->num_ips);
- ip_offset = die_offset + sizeof(*dhdr);
-
- for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
-
- if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) {
- if (major)
- *major = ip->major;
- if (minor)
- *minor = ip->minor;
- if (revision)
- *revision = ip->revision;
- return 0;
- }
- ip_offset += struct_size(ip, base_address, ip->num_base_address);
- }
- }
-
- return -EINVAL;
-}
-
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
int vcn_harvest_count = 0;
@@ -1266,7 +1355,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
* so read harvest bit per IP data structure to set
* harvest configuration.
*/
- if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0)) {
+ if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) &&
+ adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) {
if ((adev->pdev->device == 0x731E &&
(adev->pdev->revision == 0xC6 ||
adev->pdev->revision == 0xC7)) ||
@@ -1425,6 +1515,7 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
mall_size += mall_size_per_umc;
}
adev->gmc.mall_size = mall_size;
+ adev->gmc.m_half_use = half_use;
break;
default:
dev_err(adev->dev,
@@ -1706,6 +1797,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
@@ -1804,6 +1896,11 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break;
+ case IP_VERSION(9, 4, 3):
+ if (!amdgpu_exp_hw_support)
+ return -EINVAL;
+ amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
+ break;
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 1):
@@ -1939,7 +2036,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 0, 2):
- case IP_VERSION(3, 0, 192):
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
@@ -1952,7 +2048,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 0, 4):
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
- return 0;
+ break;
+ case IP_VERSION(4, 0, 3):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
@@ -2000,6 +2100,17 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
return 0;
}
+static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ aqua_vanjaram_init_soc_config(adev);
+ break;
+ default:
+ break;
+ }
+}
+
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
{
int r;
@@ -2177,6 +2288,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
}
+ amdgpu_discovery_init_soc_config(adev);
+ amdgpu_discovery_sysfs_init(adev);
+
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
@@ -2387,6 +2501,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 2):
adev->smuio.funcs = &smuio_v13_0_funcs;
break;
+ case IP_VERSION(13, 0, 3):
+ adev->smuio.funcs = &smuio_v13_0_3_funcs;
+ if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
+ adev->flags |= AMD_IS_APU;
+ }
+ break;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 8):
adev->smuio.funcs = &smuio_v13_0_6_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 8563dd4a7dc2..3a2f347bd50d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,12 +24,10 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
-#define DISCOVERY_TMR_SIZE (4 << 10)
+#define DISCOVERY_TMR_SIZE (8 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
void amdgpu_discovery_fini(struct amdgpu_device *adev);
-int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
- int *major, int *minor, int *revision);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index d60fe7eb5579..b702f499f5fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -98,7 +98,7 @@ static void amdgpu_display_flip_callback(struct dma_fence *f,
static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
struct dma_fence **f)
{
- struct dma_fence *fence= *f;
+ struct dma_fence *fence = *f;
if (fence == NULL)
return false;
@@ -1252,21 +1252,21 @@ const struct drm_mode_config_funcs amdgpu_mode_funcs = {
.fb_create = amdgpu_display_user_framebuffer_create,
};
-static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
-{ { UNDERSCAN_OFF, "off" },
+static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
+ { UNDERSCAN_OFF, "off" },
{ UNDERSCAN_ON, "on" },
{ UNDERSCAN_AUTO, "auto" },
};
-static const struct drm_prop_enum_list amdgpu_audio_enum_list[] =
-{ { AMDGPU_AUDIO_DISABLE, "off" },
+static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
+ { AMDGPU_AUDIO_DISABLE, "off" },
{ AMDGPU_AUDIO_ENABLE, "on" },
{ AMDGPU_AUDIO_AUTO, "auto" },
};
/* XXX support different dither options? spatial, temporal, both, etc. */
-static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
-{ { AMDGPU_FMT_DITHER_DISABLE, "off" },
+static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
+ { AMDGPU_FMT_DITHER_DISABLE, "off" },
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
};
@@ -1496,8 +1496,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
ret |= DRM_SCANOUTPOS_ACCURATE;
vbl_start = vbl & 0x1fff;
vbl_end = (vbl >> 16) & 0x1fff;
- }
- else {
+ } else {
/* No: Fake something reasonable which gives at least ok results. */
vbl_start = mode->crtc_vdisplay;
vbl_end = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 0c001bb8fc2b..12210598e5b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -149,7 +149,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
if (!bo->tbo.pin_count) {
/* move buffer into GTT or VRAM */
struct ttm_operation_ctx ctx = { false, false };
- unsigned domains = AMDGPU_GEM_DOMAIN_GTT;
+ unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
attach->peer2peer) {
@@ -336,7 +336,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_CPU, flags,
- ttm_bo_type_sg, resv, &gobj);
+ ttm_bo_type_sg, resv, &gobj, 0);
if (ret)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 8fd11497faba..f637574644c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -59,7 +59,7 @@ struct amdgpu_doorbell_index {
uint32_t gfx_ring1;
uint32_t gfx_userqueue_start;
uint32_t gfx_userqueue_end;
- uint32_t sdma_engine[8];
+ uint32_t sdma_engine[16];
uint32_t mes_ring0;
uint32_t mes_ring1;
uint32_t ih;
@@ -86,6 +86,8 @@ struct amdgpu_doorbell_index {
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range;
+ /* Per xcc doorbell size for KIQ/KCQ */
+ uint32_t xcc_doorbell_range;
};
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
@@ -164,7 +166,15 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
- AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ /* kiq/kcq from second XCD. Max 8 XCDs */
+ AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190,
+ /* 8 compute rings per GC. Max to 0x1CE */
+ AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197,
+
+ /* AID1 SDMA: 0x1D0 ~ 0x1F7 */
+ AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0,
+
+ AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7,
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
@@ -301,6 +311,36 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_INVALID = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT;
+typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
+ /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
+
+ /* KIQ/HIQ/DIQ */
+ AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
+ AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
+ AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
+ /* Compute: 0x08 ~ 0x20 */
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
+ AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,
+
+ /* SDMA: 0x100 ~ 0x19F */
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
+ /* IH: 0x1A0 ~ 0x1AF */
+ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
+ /* VCN: 0x1B0 ~ 0x1D4 */
+ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
+ AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
+
+ AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
+ AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
+
+ AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4,
+ AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
+} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
+
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 393b6fb7a71d..3b711babd4e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -50,6 +50,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_reset.h"
+#include "../amdxcp/amdgpu_xcp_drv.h"
/*
* KMS wrapper.
@@ -110,9 +111,11 @@
* 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
* tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
* gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
+ * 3.53.0 - Support for GFX11 CP GFX shadowing
+ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 52
+#define KMS_DRIVER_MINOR 54
#define KMS_DRIVER_PATCHLEVEL 0
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -150,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu;
char *amdgpu_virtual_display;
-
+bool enforce_isolation;
/*
* OverDrive(bit 14) disabled by default
* GFX DCS(bit 19) disabled by default
@@ -191,6 +194,7 @@ int amdgpu_smartshift_bias;
int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log;
int amdgpu_sg_display = -1; /* auto */
+int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -820,6 +824,13 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm
#endif
/**
+ * DOC: mtype_local (int)
+ */
+int amdgpu_mtype_local;
+MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
+module_param_named(mtype_local, amdgpu_mtype_local, int, 0444);
+
+/**
* DOC: pcie_p2p (bool)
* Enable PCIe P2P (requires large-BAR). Default value: true (on)
*/
@@ -948,6 +959,28 @@ MODULE_PARM_DESC(smu_pptable_id,
"specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+/**
+ * DOC: partition_mode (int)
+ * Used to override the default SPX mode.
+ */
+MODULE_PARM_DESC(
+ user_partt_mode,
+ "specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
+ 0 = AMDGPU_SPX_PARTITION_MODE, \
+ 1 = AMDGPU_DPX_PARTITION_MODE, \
+ 2 = AMDGPU_TPX_PARTITION_MODE, \
+ 3 = AMDGPU_QPX_PARTITION_MODE, \
+ 4 = AMDGPU_CPX_PARTITION_MODE)");
+module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
+
+
+/**
+ * DOC: enforce_isolation (bool)
+ * enforce process isolation between graphics and compute via using the same reserved vmid.
+ */
+module_param(enforce_isolation, bool, 0444);
+MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -1661,7 +1694,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
};
static const struct pci_device_id pciidlist[] = {
-#ifdef CONFIG_DRM_AMDGPU_SI
+#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -2018,6 +2051,11 @@ static const struct pci_device_id pciidlist[] = {
.class_mask = 0xffffff,
.driver_data = CHIP_IP_DISCOVERY },
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
{0, 0, 0}
};
@@ -2162,6 +2200,10 @@ retry_init:
goto err_pci;
}
+ ret = amdgpu_xcp_dev_register(adev, ent);
+ if (ret)
+ goto err_pci;
+
/*
* 1. don't init fbdev on hw without DCE
* 2. don't init fbdev if there are no connectors
@@ -2234,6 +2276,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
+ amdgpu_xcp_dev_unplug(adev);
drm_dev_unplug(dev);
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
@@ -2748,7 +2791,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.compat_ioctl = amdgpu_kms_compat_ioctl,
#endif
#ifdef CONFIG_PROC_FS
- .show_fdinfo = amdgpu_show_fdinfo
+ .show_fdinfo = drm_show_fdinfo,
#endif
};
@@ -2803,6 +2846,36 @@ static const struct drm_driver amdgpu_kms_driver = {
.dumb_map_offset = amdgpu_mode_dumb_mmap,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = amdgpu_show_fdinfo,
+#endif
+
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_import = amdgpu_gem_prime_import,
+ .gem_prime_mmap = drm_gem_prime_mmap,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = KMS_DRIVER_MAJOR,
+ .minor = KMS_DRIVER_MINOR,
+ .patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+const struct drm_driver amdgpu_partition_driver = {
+ .driver_features =
+ DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
+ .open = amdgpu_driver_open_kms,
+ .postclose = amdgpu_driver_postclose_kms,
+ .lastclose = amdgpu_driver_lastclose_kms,
+ .ioctls = amdgpu_ioctls_kms,
+ .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+ .dumb_create = amdgpu_mode_dumb_create,
+ .dumb_map_offset = amdgpu_mode_dumb_mmap,
+ .fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
@@ -2884,9 +2957,11 @@ static void __exit amdgpu_exit(void)
amdgpu_amdkfd_fini();
pci_unregister_driver(&amdgpu_kms_pci_driver);
amdgpu_unregister_atpx_handler();
+ amdgpu_acpi_release();
amdgpu_sync_fini();
amdgpu_fence_slab_fini();
mmu_notifier_synchronize();
+ amdgpu_xcp_drv_release();
}
module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
index 8178323e4bef..5bc2cb661af7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -42,6 +42,8 @@
#define DRIVER_DESC "AMD GPU"
#define DRIVER_DATE "20150101"
+extern const struct drm_driver amdgpu_partition_driver;
+
long amdgpu_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index 27a782a9dc72..3aaeed2d3562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -70,6 +70,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
amdgpu_encoder->active_device, amdgpu_encoder->devices,
@@ -165,12 +166,12 @@ void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
- unsigned hblank = native_mode->htotal - native_mode->hdisplay;
- unsigned vblank = native_mode->vtotal - native_mode->vdisplay;
- unsigned hover = native_mode->hsync_start - native_mode->hdisplay;
- unsigned vover = native_mode->vsync_start - native_mode->vdisplay;
- unsigned hsync_width = native_mode->hsync_end - native_mode->hsync_start;
- unsigned vsync_width = native_mode->vsync_end - native_mode->vsync_start;
+ unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
+ unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
+ unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
+ unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
+ unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
+ unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
adjusted_mode->clock = native_mode->clock;
adjusted_mode->flags = native_mode->flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index c57252f004e8..13d7413d4ca3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -53,9 +53,8 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = "jpeg",
};
-void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
{
- struct drm_file *file = f->private_data;
struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
@@ -87,31 +86,30 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
* ******************************************************************
*/
- seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
- seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
- seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
- seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
- seq_printf(m, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
- seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
- seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
- seq_printf(m, "amd-memory-visible-vram:\t%llu KiB\n",
+ drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
+ drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name);
+ drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
+ drm_printf(p, "drm-client-id:\t%Lu\n", vm->immediate.fence_context);
+ drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
+ drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
+ drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
+ drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
stats.visible_vram/1024UL);
- seq_printf(m, "amd-evicted-vram:\t%llu KiB\n",
+ drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
stats.evicted_vram/1024UL);
- seq_printf(m, "amd-evicted-visible-vram:\t%llu KiB\n",
+ drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
stats.evicted_visible_vram/1024UL);
- seq_printf(m, "amd-requested-vram:\t%llu KiB\n",
+ drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
stats.requested_vram/1024UL);
- seq_printf(m, "amd-requested-visible-vram:\t%llu KiB\n",
+ drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
stats.requested_visible_vram/1024UL);
- seq_printf(m, "amd-requested-gtt:\t%llu KiB\n",
+ drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
stats.requested_gtt/1024UL);
-
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
if (!usage[hw_ip])
continue;
- seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
+ drm_printf(p, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
ktime_to_ns(usage[hw_ip]));
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
index e86834bfea1d..0398f5a159ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
@@ -37,6 +37,6 @@
#include "amdgpu_ids.h"
uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
-void amdgpu_show_fdinfo(struct seq_file *m, struct file *f);
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index a7d250809da9..c694b41f6461 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -42,7 +42,6 @@
#include "amdgpu_reset.h"
/*
- * Fences
* Fences mark an event in the GPUs pipeline and are used
* for GPU/CPU synchronization. When the fence is written,
* it is expected that all buffers associated with that fence
@@ -140,7 +139,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* Returns 0 on success, -ENOMEM on failure.
*/
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
- unsigned flags)
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
@@ -174,11 +173,11 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
adev->fence_context + ring->idx, seq);
/* Against remove in amdgpu_job_{free, free_cb} */
dma_fence_get(fence);
- }
- else
+ } else {
dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
+ }
}
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
@@ -377,14 +376,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout)
{
- uint32_t seq;
-
- do {
- seq = amdgpu_fence_read(ring);
- udelay(5);
- timeout -= 5;
- } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
+ while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
+ udelay(2);
+ timeout -= 2;
+ }
return timeout > 0 ? timeout : 0;
}
/**
@@ -396,7 +392,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
* Returns the number of emitted fences on the ring. Used by the
* dynpm code to ring track activity.
*/
-unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
{
uint64_t emitted;
@@ -475,7 +471,7 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
*/
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
- unsigned irq_type)
+ unsigned int irq_type)
{
struct amdgpu_device *adev = ring->adev;
uint64_t index;
@@ -654,6 +650,7 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
@@ -695,6 +692,30 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
}
/**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&drv->lock, flags);
+ for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+ struct dma_fence *fence;
+
+ fence = rcu_dereference_protected(drv->fences[i],
+ lockdep_is_held(&drv->lock));
+ if (fence && !dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, error);
+ }
+ spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
* amdgpu_fence_driver_force_completion - force signal latest fence of ring
*
* @ring: fence of the ring to signal
@@ -702,6 +723,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
*/
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
{
+ amdgpu_fence_driver_set_error(ring, -ECANCELED);
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
amdgpu_fence_process(ring);
}
@@ -836,11 +858,12 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
@@ -914,6 +937,7 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
reset_work);
struct amdgpu_reset_context reset_context;
+
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 01cb89ffbd56..73b8cca35bab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -35,6 +35,7 @@
#endif
#include "amdgpu.h"
#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
/*
* GART
@@ -103,6 +104,142 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
}
/**
+ * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate system memory for GART page table for ASICs that don't have
+ * dedicated VRAM.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
+{
+ unsigned int order = get_order(adev->gart.table_size);
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+ struct amdgpu_bo *bo = NULL;
+ struct sg_table *sg = NULL;
+ struct amdgpu_bo_param bp;
+ dma_addr_t dma_addr;
+ struct page *p;
+ int ret;
+
+ if (adev->gart.bo != NULL)
+ return 0;
+
+ p = alloc_pages(gfp_flags, order);
+ if (!p)
+ return -ENOMEM;
+
+ /* If the hardware does not support UTCL2 snooping of the CPU caches
+ * then set_memory_wc() could be used as a workaround to mark the pages
+ * as write combine memory.
+ */
+ dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
+ dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
+ __free_pages(p, order);
+ p = NULL;
+ return -EFAULT;
+ }
+
+ dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
+ /* Create SG table */
+ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+ if (ret)
+ goto error;
+
+ sg_dma_address(sg->sgl) = dma_addr;
+ sg->sgl->length = adev->gart.table_size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = adev->gart.table_size;
+#endif
+ /* Create SG BO */
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->gart.table_size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.flags = 0;
+ ret = amdgpu_bo_create(adev, &bp, &bo);
+ if (ret)
+ goto error;
+
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
+ goto error;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ WARN(ret, "Pinning the GART table failed");
+ if (ret)
+ goto error_resv;
+
+ adev->gart.bo = bo;
+ adev->gart.ptr = page_to_virt(p);
+ /* Make GART table accessible in VMID0 */
+ ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
+ if (ret)
+ amdgpu_gart_table_ram_free(adev);
+ amdgpu_bo_unreserve(bo);
+
+ return 0;
+
+error_resv:
+ amdgpu_bo_unreserve(bo);
+error:
+ amdgpu_bo_unref(&bo);
+ if (sg) {
+ sg_free_table(sg);
+ kfree(sg);
+ }
+ __free_pages(p, order);
+ return ret;
+}
+
+/**
+ * amdgpu_gart_table_ram_free - free gart page table system ram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the system memory used for the GART page tableon ASICs that don't
+ * have dedicated VRAM.
+ */
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
+{
+ unsigned int order = get_order(adev->gart.table_size);
+ struct sg_table *sg = adev->gart.bo->tbo.sg;
+ struct page *p;
+ int ret;
+
+ ret = amdgpu_bo_reserve(adev->gart.bo, false);
+ if (!ret) {
+ amdgpu_bo_unpin(adev->gart.bo);
+ amdgpu_bo_unreserve(adev->gart.bo);
+ }
+ amdgpu_bo_unref(&adev->gart.bo);
+ sg_free_table(sg);
+ kfree(sg);
+ p = virt_to_page(adev->gart.ptr);
+ __free_pages(p, order);
+
+ adev->gart.ptr = NULL;
+}
+
+/**
* amdgpu_gart_table_vram_alloc - allocate vram for gart page table
*
* @adev: amdgpu_device pointer
@@ -182,7 +319,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
mb();
amdgpu_device_flush_hdp(adev, NULL);
- for (i = 0; i < adev->num_vmhubs; i++)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
drm_dev_exit(idx);
@@ -264,7 +401,7 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
mb();
amdgpu_device_flush_hdp(adev, NULL);
- for (i = 0; i < adev->num_vmhubs; i++)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 8fea3e04e411..8283d682f543 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -51,6 +51,8 @@ struct amdgpu_gart {
uint64_t gart_pte_flags;
};
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 863cb668e000..74055cba3dc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -98,7 +98,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct dma_resv *resv,
- struct drm_gem_object **obj)
+ struct drm_gem_object **obj, int8_t xcp_id_plus1)
{
struct amdgpu_bo *bo;
struct amdgpu_bo_user *ubo;
@@ -116,6 +116,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bp.flags = flags;
bp.domain = initial_domain;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.xcp_id_plus1 = xcp_id_plus1;
r = amdgpu_bo_create_user(adev, &bp, &ubo);
if (r)
@@ -336,7 +337,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
retry:
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
initial_domain,
- flags, ttm_bo_type_device, resv, &gobj);
+ flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
if (r && r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
@@ -379,6 +380,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_userptr *args = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object *gobj;
struct hmm_range *range;
struct amdgpu_bo *bo;
@@ -405,7 +407,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* create a gem object to contain this object in */
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
- 0, ttm_bo_type_device, NULL, &gobj);
+ 0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r)
return r;
@@ -908,6 +910,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct drm_gem_object *gobj;
uint32_t handle;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
@@ -931,7 +934,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
domain = amdgpu_bo_get_preferred_domain(adev,
amdgpu_display_supported_domains(adev, flags));
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
- ttm_bo_type_device, NULL, &gobj);
+ ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
if (r)
return -ENOMEM;
@@ -948,7 +951,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_device *dev = adev_to_drm(adev);
struct drm_file *file;
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 637bf51dbf06..f30264782ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -43,8 +43,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct dma_resv *resv,
- struct drm_gem_object **obj);
-
+ struct drm_gem_object **obj, int8_t xcp_id_plus1);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index f3f541ba0aca..a33d4bc34cee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -28,6 +28,7 @@
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_xcp.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -63,10 +64,10 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
}
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
- int mec, int pipe, int queue)
+ int xcc_id, int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
- adev->gfx.mec.queue_bitmap);
+ adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
}
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
@@ -204,29 +205,38 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe;
+ int i, j, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe,
adev->gfx.num_compute_rings);
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
if (multipipe_policy) {
- /* policy: make queues evenly cross all pipes on MEC1 only */
- for (i = 0; i < max_queues_per_mec; i++) {
- pipe = i % adev->gfx.mec.num_pipe_per_mec;
- queue = (i / adev->gfx.mec.num_pipe_per_mec) %
- adev->gfx.mec.num_queue_per_pipe;
-
- set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
- adev->gfx.mec.queue_bitmap);
+ /* policy: make queues evenly cross all pipes on MEC1 only
+ * for multiple xcc, just use the original policy for simplicity */
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
}
} else {
/* policy: amdgpu owns all queues in the given pipe */
- for (i = 0; i < max_queues_per_mec; ++i)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; ++i)
+ set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
}
- dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ for (j = 0; j < num_xcc; j++) {
+ dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+ bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ }
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
@@ -258,7 +268,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
}
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
+ struct amdgpu_ring *ring, int xcc_id)
{
int queue_bit;
int mec, pipe, queue;
@@ -268,7 +278,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
* adev->gfx.mec.num_queue_per_pipe;
while (--queue_bit >= 0) {
- if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+ if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue;
amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
@@ -294,9 +304,9 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq)
+ struct amdgpu_irq_src *irq, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
int r = 0;
spin_lock_init(&kiq->ring_lock);
@@ -304,16 +314,20 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.kiq;
- ring->vm_hub = AMDGPU_GFXHUB_0;
-
- r = amdgpu_gfx_kiq_acquire(adev, ring);
+ ring->xcc_id = xcc_id;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ ring->doorbell_index =
+ (adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+
+ r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r)
return r;
ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true;
- sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+ sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
@@ -327,19 +341,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
amdgpu_ring_fini(ring);
}
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
}
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size)
+ unsigned hpd_size, int xcc_id)
{
int r;
u32 *hpd;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
@@ -362,13 +376,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
/* create MQD for each compute/gfx queue */
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size)
+ unsigned mqd_size, int xcc_id)
{
- struct amdgpu_ring *ring = NULL;
- int r, i;
+ int r, i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *ring = &kiq->ring;
+ u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+
+ /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+ domain |= AMDGPU_GEM_DOMAIN_VRAM;
/* create MQD for KIQ */
- ring = &adev->gfx.kiq.ring;
if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
@@ -387,8 +406,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
}
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
+ kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+ if (!kiq->mqd_backup)
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
}
@@ -398,13 +417,14 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring = &adev->gfx.gfx_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
+ ring->mqd_size = mqd_size;
/* prepare MQD backup */
adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.me.mqd_backup[i])
@@ -415,19 +435,21 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
/* create MQD for each KCQ */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ domain, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
+ ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->gfx.mec.mqd_backup[i])
+ adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.mec.mqd_backup[j])
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
}
}
@@ -435,10 +457,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_ring *ring = NULL;
- int i;
+ int i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
@@ -451,43 +474,81 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- kfree(adev->gfx.mec.mqd_backup[i]);
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
+ kfree(adev->gfx.mec.mqd_backup[j]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
- ring = &adev->gfx.kiq.ring;
- kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
+ ring = &kiq->ring;
+ kfree(kiq->mqd_backup);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
int i, r = 0;
+ int j;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_compute_rings)) {
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&kiq->ring_lock);
return -ENOMEM;
}
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
+ }
- if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
+ if (kiq_ring->sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&kiq->ring_lock);
+
+ return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i, r = 0;
+ int j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock(&kiq->ring_lock);
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_gfx_rings)) {
+ spin_unlock(&kiq->ring_lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ }
+
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
return r;
}
@@ -505,18 +566,18 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
return set_resource_bit;
}
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
uint64_t queue_mask = 0;
- int r, i;
+ int r, i, j;
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
return -EINVAL;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
continue;
/* This situation may be hit in the future if a new HW
@@ -532,13 +593,15 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
kiq_ring->queue);
- spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings +
kiq->pmf->set_resources_size);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&kiq->ring_lock);
return r;
}
@@ -546,11 +609,51 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
queue_mask = ~0ULL;
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.compute_ring[j]);
+ }
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ DRM_ERROR("KCQ enable failed\n");
+
+ return r;
+}
+
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int r, i, j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ spin_lock(&kiq->ring_lock);
+ /* No need to map kcq on the slave */
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_gfx_rings);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j]);
+ }
+ }
+
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
if (r)
DRM_ERROR("KCQ enable failed\n");
@@ -785,12 +888,31 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
}
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id))
+{
+ int i;
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ if (err_data) {
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+ }
+
+ for_each_inst(i, xcc_mask)
+ func(adev, ras_error_status, i);
+}
+
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq, reg_val_offs = 0, value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
if (amdgpu_device_skip_hw_access(adev))
@@ -858,7 +980,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg);
@@ -1062,3 +1184,125 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
}
}
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+ return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+ adev->gfx.num_xcc_per_xcp : 1));
+}
+
+static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int mode;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+
+ return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
+}
+
+static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_gfx_partition mode;
+ int ret = 0, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (num_xcc % 2 != 0)
+ return -EINVAL;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX"))) {
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
+ /*
+ * DPX mode needs AIDs to be in multiple of 2.
+ * Each AID connects 2 XCCs.
+ */
+ if (num_xcc%4)
+ return -EINVAL;
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
+ if (num_xcc != 6)
+ return -EINVAL;
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
+ if (num_xcc != 8)
+ return -EINVAL;
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ } else {
+ return -EINVAL;
+ }
+
+ ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ char *supported_partition;
+
+ /* TBD */
+ switch (NUM_XCC(adev->gfx.xcc_mask)) {
+ case 8:
+ supported_partition = "SPX, DPX, QPX, CPX";
+ break;
+ case 6:
+ supported_partition = "SPX, TPX, CPX";
+ break;
+ case 4:
+ supported_partition = "SPX, DPX, CPX";
+ break;
+ /* this seems only existing in emulation phase */
+ case 2:
+ supported_partition = "SPX, CPX";
+ break;
+ default:
+ supported_partition = "Not supported";
+ break;
+ }
+
+ return sysfs_emit(buf, "%s\n", supported_partition);
+}
+
+static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR,
+ amdgpu_gfx_get_current_compute_partition,
+ amdgpu_gfx_set_compute_partition);
+
+static DEVICE_ATTR(available_compute_partition, S_IRUGO,
+ amdgpu_gfx_get_available_compute_partition, NULL);
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
+ if (r)
+ return r;
+
+ r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
+
+ return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_current_compute_partition);
+ device_remove_file(adev->dev, &dev_attr_available_compute_partition);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index bfabea76d166..ce0f7a8ad4b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -61,7 +61,42 @@ enum amdgpu_gfx_partition {
AMDGPU_TPX_PARTITION_MODE = 2,
AMDGPU_QPX_PARTITION_MODE = 3,
AMDGPU_CPX_PARTITION_MODE = 4,
- AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE,
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
+ /* Automatically choose the right mode */
+ AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
+};
+
+#define NUM_XCC(x) hweight16(x)
+
+enum amdgpu_pkg_type {
+ AMDGPU_PKG_TYPE_APU = 2,
+ AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
+enum amdgpu_gfx_ras_mem_id_type {
+ AMDGPU_GFX_CP_MEM = 0,
+ AMDGPU_GFX_GCEA_MEM,
+ AMDGPU_GFX_GC_CANE_MEM,
+ AMDGPU_GFX_GCUTCL2_MEM,
+ AMDGPU_GFX_GDS_MEM,
+ AMDGPU_GFX_LDS_MEM,
+ AMDGPU_GFX_RLC_MEM,
+ AMDGPU_GFX_SP_MEM,
+ AMDGPU_GFX_SPI_MEM,
+ AMDGPU_GFX_SQC_MEM,
+ AMDGPU_GFX_SQ_MEM,
+ AMDGPU_GFX_TA_MEM,
+ AMDGPU_GFX_TCC_MEM,
+ AMDGPU_GFX_TCA_MEM,
+ AMDGPU_GFX_TCI_MEM,
+ AMDGPU_GFX_TCP_MEM,
+ AMDGPU_GFX_TD_MEM,
+ AMDGPU_GFX_TCX_MEM,
+ AMDGPU_GFX_ATC_L2_MEM,
+ AMDGPU_GFX_UTCL2_MEM,
+ AMDGPU_GFX_VML2_MEM,
+ AMDGPU_GFX_VML2_WALKER_MEM,
+ AMDGPU_GFX_MEM_TYPE_NUM
};
struct amdgpu_mec {
@@ -75,8 +110,10 @@ struct amdgpu_mec {
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+ void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+};
+struct amdgpu_mec_bitmap {
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
@@ -120,6 +157,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf;
+ void *mqd_backup;
};
/*
@@ -230,23 +268,37 @@ struct amdgpu_gfx_ras {
struct amdgpu_iv_entry *entry);
};
+struct amdgpu_gfx_shadow_info {
+ u32 shadow_size;
+ u32 shadow_alignment;
+ u32 csa_size;
+ u32 csa_alignment;
+};
+
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
- void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
+ u32 sh_num, u32 instance, int xcc_id);
+ void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t *dst, int *no_fields);
- void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
+ void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread, uint32_t start,
uint32_t size, uint32_t *dst);
- void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
+ void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start, uint32_t size,
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
- u32 queue, u32 vmid);
+ u32 queue, u32 vmid, u32 xcc_id);
void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+ int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info);
+ enum amdgpu_gfx_partition
+ (*query_partition_mode)(struct amdgpu_device *adev);
+ int (*switch_partition_mode)(struct amdgpu_device *adev,
+ int num_xccs_per_xcp);
+ int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
};
struct sq_work {
@@ -296,7 +348,8 @@ struct amdgpu_gfx {
struct amdgpu_ce ce;
struct amdgpu_me me;
struct amdgpu_mec mec;
- struct amdgpu_kiq kiq;
+ struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES];
struct amdgpu_imu imu;
bool rs64_enable; /* firmware format */
const struct firmware *me_fw; /* ME firmware */
@@ -376,15 +429,31 @@ struct amdgpu_gfx {
struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux muxer;
- enum amdgpu_gfx_partition partition_mode;
- uint32_t num_xcd;
+ bool cp_gfx_shadow; /* for gfx11 */
+
+ uint16_t xcc_mask;
uint32_t num_xcc_per_xcp;
+ struct mutex partition_mutex;
};
+struct amdgpu_gfx_ras_reg_entry {
+ struct amdgpu_ras_err_status_reg_entry reg_entry;
+ enum amdgpu_gfx_ras_mem_id_type mem_id_type;
+ uint32_t se_num;
+};
+
+struct amdgpu_gfx_ras_mem_id_entry {
+ const struct amdgpu_ras_memory_id_entry *mem_id_ent;
+ uint32_t size;
+};
+
+#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
+
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
-#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
-#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
@@ -404,19 +473,21 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+ struct amdgpu_irq_src *irq, int xcc_id);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size);
+ unsigned hpd_size, int xcc_id);
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size);
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev);
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev);
+ unsigned mqd_size, int xcc_id);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
@@ -425,8 +496,8 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue);
void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue);
-bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
- int pipe, int queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
+ int mec, int pipe, int queue);
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
@@ -458,4 +529,33 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id)
int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id));
+
+static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
+{
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return "SPX";
+ case AMDGPU_DPX_PARTITION_MODE:
+ return "DPX";
+ case AMDGPU_TPX_PARTITION_MODE:
+ return "TPX";
+ case AMDGPU_QPX_PARTITION_MODE:
+ return "QPX";
+ case AMDGPU_CPX_PARTITION_MODE:
+ return "CPX";
+ default:
+ return "UNKNOWN";
+ }
+
+ return "UNKNOWN";
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 95b0f984acbf..d78bd9732543 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -534,22 +534,21 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
* subject to change when ring number changes
* Engine 17: Gart flushes
*/
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
- {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
- GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+ unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
- if (adev->enable_mes) {
+ /* init the vm inv eng for all vmhubs */
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
/* reserve engine 5 for firmware */
- for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++)
- vm_inv_engs[vmhub] &= ~(1 << 5);
+ if (adev->enable_mes)
+ vm_inv_engs[i] &= ~(1 << 5);
}
for (i = 0; i < adev->num_rings; ++i) {
@@ -671,7 +670,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
- tmp = (hub_type == AMDGPU_GFXHUB_0) ?
+ tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
RREG32_SOC15_IP(GC, reg) :
RREG32_SOC15_IP(MMHUB, reg);
@@ -680,7 +679,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
else
tmp &= ~hub->vm_cntx_cntl_vm_fault;
- (hub_type == AMDGPU_GFXHUB_0) ?
+ (hub_type == AMDGPU_GFXHUB(0)) ?
WREG32_SOC15_IP(GC, reg, tmp) :
WREG32_SOC15_IP(MMHUB, reg, tmp);
}
@@ -893,3 +892,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
return 0;
}
+
+static ssize_t current_memory_partition_show(
+ struct device *dev, struct device_attribute *addr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ switch (mode) {
+ case AMDGPU_NPS1_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS1\n");
+ case AMDGPU_NPS2_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS2\n");
+ case AMDGPU_NPS3_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS3\n");
+ case AMDGPU_NPS4_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS4\n");
+ case AMDGPU_NPS6_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS6\n");
+ case AMDGPU_NPS8_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS8\n");
+ default:
+ return sysfs_emit(buf, "UNKNOWN\n");
+ }
+
+ return sysfs_emit(buf, "UNKNOWN\n");
+}
+
+static DEVICE_ATTR_RO(current_memory_partition);
+
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return 0;
+
+ return device_create_file(adev->dev,
+ &dev_attr_current_memory_partition);
+}
+
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6d105d7fb98b..56d73fade568 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -63,6 +63,16 @@
struct firmware;
+enum amdgpu_memory_partition {
+ UNKNOWN_MEMORY_PARTITION_MODE = 0,
+ AMDGPU_NPS1_PARTITION_MODE = 1,
+ AMDGPU_NPS2_PARTITION_MODE = 2,
+ AMDGPU_NPS3_PARTITION_MODE = 3,
+ AMDGPU_NPS4_PARTITION_MODE = 4,
+ AMDGPU_NPS6_PARTITION_MODE = 6,
+ AMDGPU_NPS8_PARTITION_MODE = 8,
+};
+
/*
* GMC page fault information
*/
@@ -119,7 +129,8 @@ struct amdgpu_gmc_funcs {
uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via pasid */
int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type, bool all_hub);
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -137,8 +148,15 @@ struct amdgpu_gmc_funcs {
void (*get_vm_pte)(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
uint64_t *flags);
+ /* override per-page pte flags */
+ void (*override_vm_pte_flags)(struct amdgpu_device *dev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+
+ enum amdgpu_memory_partition (*query_mem_partition_mode)(
+ struct amdgpu_device *adev);
};
struct amdgpu_xgmi_ras {
@@ -164,6 +182,21 @@ struct amdgpu_xgmi {
struct amdgpu_xgmi_ras *ras;
};
+struct amdgpu_mem_partition_info {
+ union {
+ struct {
+ uint32_t fpfn;
+ uint32_t lpfn;
+ } range;
+ struct {
+ int node;
+ } numa;
+ };
+ uint64_t size;
+};
+
+#define INVALID_PFN -1
+
struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to
* map FB). This is different compared to the agp/
@@ -250,7 +283,10 @@ struct amdgpu_gmc {
uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
bool tmz_enabled;
+ bool is_app_apu;
+ struct amdgpu_mem_partition_info *mem_partitions;
+ uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs *gmc_funcs;
struct amdgpu_xgmi xgmi;
@@ -265,6 +301,8 @@ struct amdgpu_gmc {
/* MALL size */
u64 mall_size;
+ uint32_t m_half_use;
+
/* number of UMC instances */
int num_umc;
/* mode2 save restore */
@@ -296,14 +334,17 @@ struct amdgpu_gmc {
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
- ((adev), (pasid), (type), (allhub)))
+ ((adev), (pasid), (type), (allhub), (inst)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
+ (adev)->gmc.gmc_funcs->override_vm_pte_flags \
+ ((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
/**
@@ -373,4 +414,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 2dadcfe43d03..081267161d40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
hmm_range->start, hmm_range->end);
- /* Assuming 512MB takes maxmium 1 second to fault page address */
- timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL);
+ /* Assuming 128MB takes maximum 1 second to fault page address */
+ timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL);
timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
timeout = jiffies + msecs_to_jiffies(timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4ff348e10e4d..ebeddc9a37e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
unsigned fence_flags = 0;
- bool secure;
+ bool secure, init_shadow;
+ u64 shadow_va, csa_va, gds_va;
+ int vmid = AMDGPU_JOB_GET_VMID(job);
unsigned i;
int r = 0;
@@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
vm = job->vm;
fence_ctx = job->base.s_fence ?
job->base.s_fence->scheduled.context : 0;
+ shadow_va = job->shadow_va;
+ csa_va = job->csa_va;
+ gds_va = job->gds_va;
+ init_shadow = job->init_shadow;
} else {
vm = NULL;
fence_ctx = 0;
+ shadow_va = 0;
+ csa_va = 0;
+ gds_va = 0;
+ init_shadow = false;
}
if (!ring->sched.ready && !ring->is_mes_queue) {
@@ -212,7 +222,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
amdgpu_ring_ib_begin(ring);
- if (job && ring->funcs->init_cond_exec)
+
+ if (ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+ init_shadow, vmid);
+
+ if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
amdgpu_device_flush_hdp(adev, ring);
@@ -263,6 +278,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
+ if (ring->funcs->emit_gfx_shadow) {
+ amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
+
+ if (ring->funcs->init_cond_exec) {
+ unsigned ce_offset = ~0;
+
+ ce_offset = amdgpu_ring_init_cond_exec(ring);
+ if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
+ amdgpu_ring_patch_cond_exec(ring, ce_offset);
+ }
+ }
+
r = amdgpu_fence_emit(ring, f, job, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
@@ -436,7 +463,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
seq_printf(m, "--------------------- DELAYED --------------------- \n");
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index c991ca0b7a1c..ff1ea99292fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r || !idle)
goto error;
- if (vm->reserved_vmid[vmhub]) {
+ if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
@@ -460,14 +460,11 @@ error:
}
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
- if (vm->reserved_vmid[vmhub])
- goto unlock;
++id_mgr->reserved_use_count;
if (!id_mgr->reserved) {
@@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
list_del_init(&id->list);
id_mgr->reserved = id;
}
- vm->reserved_vmid[vmhub] = true;
-unlock:
mutex_unlock(&id_mgr->lock);
return 0;
}
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
- if (vm->reserved_vmid[vmhub] &&
- !--id_mgr->reserved_use_count) {
+ if (!--id_mgr->reserved_use_count) {
/* give the reserved ID back to normal round robin */
list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
id_mgr->reserved = NULL;
}
- vm->reserved_vmid[vmhub] = false;
+
mutex_unlock(&id_mgr->lock);
}
@@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
+ /* alloc a default reserved vmid to enforce isolation */
+ if (enforce_isolation)
+ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index d1cc09b45da4..fa8c42c83d5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- unsigned vmhub);
+ unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- unsigned vmhub);
+ unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_job *job, struct dma_fence **fence);
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 1d5af50331e4..fceb3b384955 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -270,7 +270,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
entry->timestamp_src = dw[2] >> 31;
entry->pasid = dw[3] & 0xffff;
- entry->pasid_src = dw[3] >> 31;
+ entry->node_id = (dw[3] >> 16) & 0xff;
entry->src_data[0] = dw[4];
entry->src_data[1] = dw[5];
entry->src_data[2] = dw[6];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index fafebec5b7b6..5273decc5753 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -99,6 +99,21 @@ const char *soc15_ih_clientid_name[] = {
"MP1"
};
+const int node_id_to_phys_map[NODEID_MAX] = {
+ [AID0_NODEID] = 0,
+ [XCD0_NODEID] = 0,
+ [XCD1_NODEID] = 1,
+ [AID1_NODEID] = 1,
+ [XCD2_NODEID] = 2,
+ [XCD3_NODEID] = 3,
+ [AID2_NODEID] = 2,
+ [XCD4_NODEID] = 4,
+ [XCD5_NODEID] = 5,
+ [AID3_NODEID] = 3,
+ [XCD6_NODEID] = 6,
+ [XCD7_NODEID] = 7,
+};
+
/**
* amdgpu_irq_disable_all - disable *all* interrupts
*
@@ -109,7 +124,7 @@ const char *soc15_ih_clientid_name[] = {
void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
- unsigned i, j, k;
+ unsigned int i, j, k;
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
@@ -124,7 +139,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
continue;
for (k = 0; k < src->num_types; ++k) {
- atomic_set(&src->enabled_types[k], 0);
r = src->funcs->set(adev, src, k,
AMDGPU_IRQ_STATE_DISABLE);
if (r)
@@ -268,11 +282,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
int nvec = pci_msix_vec_count(adev->pdev);
unsigned int flags;
- if (nvec <= 0) {
+ if (nvec <= 0)
flags = PCI_IRQ_MSI;
- } else {
+ else
flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
- }
+
/* we only need one vector */
nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
if (nvec > 0) {
@@ -331,7 +345,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
*/
void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
{
- unsigned i, j;
+ unsigned int i, j;
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
@@ -365,7 +379,7 @@ void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
* 0 on success or error code otherwise
*/
int amdgpu_irq_add_id(struct amdgpu_device *adev,
- unsigned client_id, unsigned src_id,
+ unsigned int client_id, unsigned int src_id,
struct amdgpu_irq_src *source)
{
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
@@ -417,7 +431,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
{
u32 ring_index = ih->rptr >> 2;
struct amdgpu_iv_entry entry;
- unsigned client_id, src_id;
+ unsigned int client_id, src_id;
struct amdgpu_irq_src *src;
bool handled = false;
int r;
@@ -453,7 +467,8 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
handled = true;
} else {
- DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
+ DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n",
+ src_id, client_id);
}
/* Send it to amdkfd as well if it isn't already handled */
@@ -492,7 +507,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
* Updates interrupt state for the specific source (all ASICs).
*/
int amdgpu_irq_update(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src, unsigned type)
+ struct amdgpu_irq_src *src, unsigned int type)
{
unsigned long irqflags;
enum amdgpu_interrupt_state state;
@@ -501,7 +516,8 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
spin_lock_irqsave(&adev->irq.lock, irqflags);
/* We need to determine after taking the lock, otherwise
- we might disable just enabled interrupts again */
+ * we might disable just enabled interrupts again
+ */
if (amdgpu_irq_enabled(adev, src, type))
state = AMDGPU_IRQ_STATE_ENABLE;
else
@@ -555,7 +571,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
* 0 on success or error code otherwise
*/
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
if (!adev->irq.installed)
return -ENOENT;
@@ -585,7 +601,7 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* 0 on success or error code otherwise
*/
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
if (!adev->irq.installed)
return -ENOENT;
@@ -619,7 +635,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* invalid parameters
*/
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
- unsigned type)
+ unsigned int type)
{
if (!adev->irq.installed)
return false;
@@ -732,7 +748,7 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
* Returns:
* Linux IRQ
*/
-unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
+unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
{
adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index be243adf3e65..04c0b4fa17a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -53,7 +53,7 @@ struct amdgpu_iv_entry {
uint64_t timestamp;
unsigned timestamp_src;
unsigned pasid;
- unsigned pasid_src;
+ unsigned node_id;
unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
};
@@ -102,6 +102,24 @@ struct amdgpu_irq {
bool retry_cam_enabled;
};
+enum interrupt_node_id_per_aid {
+ AID0_NODEID = 0,
+ XCD0_NODEID = 1,
+ XCD1_NODEID = 2,
+ AID1_NODEID = 4,
+ XCD2_NODEID = 5,
+ XCD3_NODEID = 6,
+ AID2_NODEID = 8,
+ XCD4_NODEID = 9,
+ XCD5_NODEID = 10,
+ AID3_NODEID = 12,
+ XCD6_NODEID = 13,
+ XCD7_NODEID = 14,
+ NODEID_MAX,
+};
+
+extern const int node_id_to_phys_map[NODEID_MAX];
+
void amdgpu_irq_disable_all(struct amdgpu_device *adev);
int amdgpu_irq_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c3d9d75143f4..78476bc75b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -65,6 +65,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
ti.process_name, ti.tgid, ti.task_name, ti.pid);
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
if (amdgpu_device_should_recover_gpu(ring->adev)) {
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
@@ -107,7 +109,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
(*job)->vm = vm;
amdgpu_sync_create(&(*job)->explicit_sync);
- (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ (*job)->generation = amdgpu_vm_generation(adev, vm);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
if (!entity)
@@ -256,16 +258,27 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
struct dma_fence *fence = NULL;
int r;
+ /* Ignore soft recovered fences here */
+ r = drm_sched_entity_error(s_entity);
+ if (r && r != -ENODATA)
+ goto error;
+
if (!fence && job->gang_submit)
fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
while (!fence && job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
- if (r)
+ if (r) {
DRM_ERROR("Error getting VM ID (%d)\n", r);
+ goto error;
+ }
}
return fence;
+
+error:
+ dma_fence_set_error(&job->base.s_fence->finished, r);
+ return NULL;
}
static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
@@ -282,7 +295,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
trace_amdgpu_sched_run_job(job);
/* Skip job if VRAM is lost and never resubmit gangs */
- if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter) ||
+ if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
(job->job_run_counter && job->gang_submit))
dma_fence_set_error(finished, -ECANCELED);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 52f2e313ea17..a963a25ddd62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -61,12 +61,18 @@ struct amdgpu_job {
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
- uint32_t vram_lost_counter;
+ uint64_t generation;
/* user fence handling */
uint64_t uf_addr;
uint64_t uf_sequence;
+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_t shadow_va;
+ uint64_t csa_va;
+ uint64_t gds_va;
+ bool init_shadow;
+
/* job_run_counter >= 1 means a resubmit job */
uint32_t job_run_counter;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 4fa019c8aefc..3add4b4f0667 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -45,13 +45,14 @@ int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
{
- int i;
+ int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
- amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
}
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
@@ -76,13 +77,14 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, jpeg.idle_work.work);
unsigned int fences = 0;
- unsigned int i;
+ unsigned int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
- fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
}
if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
@@ -122,18 +124,21 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
if (amdgpu_sriov_vf(adev))
return 0;
- WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
- amdgpu_ring_write(ring, 0xDEADBEEF);
+ WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
+ /* Add a read register to make sure the write register is executed. */
+ RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+
+ amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
+ amdgpu_ring_write(ring, 0xABADCAFE);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xABADCAFE)
break;
udelay(1);
}
@@ -161,8 +166,7 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0];
- ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0,
- PACKETJ_TYPE0);
+ ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
ib->ptr[1] = 0xDEADBEEF;
for (i = 2; i < 16; i += 2) {
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@@ -208,7 +212,7 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 1471a1ebb034..ffe47e9f5bf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -26,20 +26,22 @@
#include "amdgpu_ras.h"
-#define AMDGPU_MAX_JPEG_INSTANCES 2
+#define AMDGPU_MAX_JPEG_INSTANCES 4
+#define AMDGPU_MAX_JPEG_RINGS 8
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
struct amdgpu_jpeg_reg{
- unsigned jpeg_pitch;
+ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
};
struct amdgpu_jpeg_inst {
- struct amdgpu_ring ring_dec;
+ struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
struct amdgpu_irq_src irq;
struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_jpeg_reg external;
+ uint8_t aid_id;
};
struct amdgpu_jpeg_ras {
@@ -49,6 +51,7 @@ struct amdgpu_jpeg_ras {
struct amdgpu_jpeg {
uint8_t num_jpeg_inst;
struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+ unsigned num_jpeg_rings;
struct amdgpu_jpeg_reg internal;
unsigned harvest_config;
struct delayed_work idle_work;
@@ -57,6 +60,9 @@ struct amdgpu_jpeg {
atomic_t total_submission_cnt;
struct ras_common_if *ras_if;
struct amdgpu_jpeg_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 0efb38539d70..e3531aa3c8bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -462,8 +462,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->jpeg.harvest_config & (1 << i))
continue;
- if (adev->jpeg.inst[i].ring_dec.sched.ready)
- ++num_rings;
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
+ ++num_rings;
}
ib_start_alignment = 16;
ib_size_alignment = 16;
@@ -876,6 +877,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
dev_info->mall_size = adev->gmc.mall_size;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
+ if (!ret) {
+ dev_info->shadow_size = shadow_info.shadow_size;
+ dev_info->shadow_alignment = shadow_info.shadow_alignment;
+ dev_info->csa_size = shadow_info.csa_size;
+ dev_info->csa_alignment = shadow_info.csa_alignment;
+ }
+ }
+
ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info);
@@ -1140,6 +1154,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
kfree(caps);
return r;
}
+ case AMDGPU_INFO_MAX_IBS: {
+ uint32_t max_ibs[AMDGPU_HW_IP_NUM];
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ max_ibs[i] = amdgpu_ring_max_ibs(i);
+
+ return copy_to_user(out, max_ibs,
+ min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -1210,6 +1233,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r)
goto error_pasid;
+ r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
+ if (r)
+ goto error_vm;
+
r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
if (r)
goto error_vm;
@@ -1284,12 +1311,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
- if (amdgpu_mcbp) {
- /* TODO: how to handle reserve failure */
- BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
- amdgpu_vm_bo_del(adev, fpriv->csa_va);
+ if (fpriv->csa_va) {
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+ fpriv->csa_va, csa_addr));
fpriv->csa_va = NULL;
- amdgpu_bo_unreserve(adev->virt.csa_obj);
}
pasid = fpriv->vm.pasid;
@@ -1441,7 +1468,7 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
struct atom_context *ctx = adev->mode_info.atom_context;
@@ -1449,7 +1476,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
int ret, i;
static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
-#define TA_FW_NAME(type) [TA_FW_TYPE_PSP_##type] = #type
+#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
TA_FW_NAME(XGMI),
TA_FW_NAME(RAS),
TA_FW_NAME(HDCP),
@@ -1548,7 +1575,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
fw_info.feature, fw_info.ver);
/* RLCV */
- query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f0f00466b59f..e9091ebfe230 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -924,6 +924,43 @@ error:
return r;
}
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes set shader debugger is not supported!\n");
+ return -EINVAL;
+ }
+
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.u32all = flags;
+ op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
+ memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
+ sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 14)
+ op_input.set_shader_debugger.trap_en = trap_en;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to set_shader_debugger\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+
+ return r;
+}
+
static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
@@ -1305,14 +1342,9 @@ static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
if (!ring)
continue;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- DRM_DEV_ERROR(ring->adev->dev,
- "ring %s test failed (%d)\n",
- ring->name, r);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
return r;
- } else
- DRM_INFO("ring %s test pass\n", ring->name);
r = amdgpu_ring_test_ib(ring, 1000 * 10);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 547ec35691fa..2d6ac30b7135 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -219,6 +219,8 @@ struct mes_add_queue_input {
uint32_t gws_size;
uint64_t tba_addr;
uint64_t tma_addr;
+ uint32_t trap_en;
+ uint32_t skip_process_ctx_clear;
uint32_t is_kfd_process;
uint32_t is_aql_queue;
uint32_t queue_size;
@@ -256,6 +258,7 @@ enum mes_misc_opcode {
MES_MISC_OP_READ_REG,
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
+ MES_MISC_OP_SET_SHADER_DEBUGGER,
};
struct mes_misc_op_input {
@@ -278,6 +281,21 @@ struct mes_misc_op_input {
uint32_t reg0;
uint32_t reg1;
} wrm_reg;
+
+ struct {
+ uint64_t process_context_addr;
+ union {
+ struct {
+ uint64_t single_memop : 1;
+ uint64_t single_alu_op : 1;
+ uint64_t reserved: 30;
+ };
+ uint32_t u32all;
+ } flags;
+ uint32_t spi_gdbg_per_vmid_cntl;
+ uint32_t tcp_watch_cntl[4];
+ uint32_t trap_en;
+ } set_shader_debugger;
};
};
@@ -340,6 +358,12 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en);
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index d21bb6dae56e..1ca9d4ed8063 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -21,6 +21,29 @@
#ifndef __AMDGPU_MMHUB_H__
#define __AMDGPU_MMHUB_H__
+enum amdgpu_mmhub_ras_memory_id {
+ AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
+ AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
+ AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
+ AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
+ AMDGPU_MMHUB_WIO_CMDMEM = 4,
+ AMDGPU_MMHUB_RIO_CMDMEM = 5,
+ AMDGPU_MMHUB_WGMI_CMDMEM = 6,
+ AMDGPU_MMHUB_RGMI_CMDMEM = 7,
+ AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
+ AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
+ AMDGPU_MMHUB_MAM_DMEM0 = 10,
+ AMDGPU_MMHUB_MAM_DMEM1 = 11,
+ AMDGPU_MMHUB_MAM_DMEM2 = 12,
+ AMDGPU_MMHUB_MAM_DMEM3 = 13,
+ AMDGPU_MMHUB_WRET_TAGMEM = 19,
+ AMDGPU_MMHUB_RRET_TAGMEM = 20,
+ AMDGPU_MMHUB_WIO_DATAMEM = 21,
+ AMDGPU_MMHUB_WGMI_DATAMEM = 22,
+ AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
+ AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
+};
+
struct amdgpu_mmhub_ras {
struct amdgpu_ras_block_object ras_block;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index c686ff4bcc39..8ab8ae01f87c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -61,6 +61,7 @@ struct amdgpu_nbio_funcs {
u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
u32 (*get_rev_id)(struct amdgpu_device *adev);
@@ -95,6 +96,9 @@ struct amdgpu_nbio_funcs {
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
u32 (*get_rom_offset)(struct amdgpu_device *adev);
+ int (*get_compute_partition_mode)(struct amdgpu_device *adev);
+ u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
+ u32 *supp_modes);
};
struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index a70103ac0026..f7905bce0de1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -131,15 +131,25 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
u32 c = 0;
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
- unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-
- places[c].fpfn = 0;
- places[c].lpfn = 0;
+ unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+
+ if (adev->gmc.mem_partitions && mem_id >= 0) {
+ places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
+ /*
+ * memory partition range lpfn is inclusive start + size - 1
+ * TTM place lpfn is exclusive start + size
+ */
+ places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
+ } else {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ }
places[c].mem_type = TTM_PL_VRAM;
places[c].flags = 0;
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- places[c].lpfn = visible_pfn;
+ places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
@@ -575,6 +585,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->flags = bp->flags;
+ if (adev->gmc.mem_partitions)
+ /* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
+ bo->xcp_id = bp->xcp_id_plus1 - 1;
+ else
+ /* For GPUs without spatial partitioning */
+ bo->xcp_id = 0;
+
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -611,7 +628,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
+ r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
if (unlikely(r))
goto fail_unreserve;
@@ -933,7 +950,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
- unsigned fpfn, lpfn;
+ unsigned int fpfn, lpfn;
fpfn = min_offset >> PAGE_SHIFT;
lpfn = max_offset >> PAGE_SHIFT;
@@ -1014,7 +1031,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
}
}
-static const char *amdgpu_vram_names[] = {
+static const char * const amdgpu_vram_names[] = {
"UNKNOWN",
"GDDR1",
"DDR2",
@@ -1042,7 +1059,7 @@ static const char *amdgpu_vram_names[] = {
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* On A+A platform, VRAM can be mapped as WB */
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
/* reserve PAT memory space to WC for VRAM */
int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
adev->gmc.aper_size);
@@ -1078,8 +1095,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
amdgpu_ttm_fini(adev);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
-
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
}
@@ -1146,8 +1162,8 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
- uint32_t metadata_size, uint64_t flags)
+int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
+ u32 metadata_size, uint64_t flags)
{
struct amdgpu_bo_user *ubo;
void *buffer;
@@ -1266,8 +1282,12 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats)
{
- unsigned int domain;
uint64_t size = amdgpu_bo_size(bo);
+ unsigned int domain;
+
+ /* Abort if the BO doesn't currently have a backing store */
+ if (!bo->tbo.resource)
+ return;
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
switch (domain) {
@@ -1336,7 +1356,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
if (!WARN_ON(r)) {
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 35b8106816a1..05496b97ef93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -56,6 +56,8 @@ struct amdgpu_bo_param {
bool no_wait_gpu;
struct dma_resv *resv;
void (*destroy)(struct ttm_buffer_object *bo);
+ /* xcp partition number plus 1, 0 means any partition */
+ int8_t xcp_id_plus1;
};
/* bo virtual addresses in a vm */
@@ -108,6 +110,13 @@ struct amdgpu_bo {
struct mmu_interval_notifier notifier;
#endif
struct kgd_mem *kfd_bo;
+
+ /*
+ * For GPUs with spatial partitioning, xcp partition number, -1 means
+ * any partition. For other ASICs without spatial partition, always 0
+ * for memory accounting.
+ */
+ int8_t xcp_id;
};
struct amdgpu_bo_user {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a150b7a4b4aa..e15c27e05564 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -146,6 +146,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 0):
adev->virt.autoload_ucode_id = 0;
break;
+ case IP_VERSION(13, 0, 6):
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
case IP_VERSION(13, 0, 10):
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
ret = psp_init_cap_microcode(psp, ucode_prefix);
@@ -329,6 +332,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
bool ret = false;
int i;
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
+ return false;
+
db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
@@ -411,7 +417,7 @@ static int psp_sw_init(void *handle)
if ((psp_get_runtime_db_entry(adev,
PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
&scpm_entry)) &&
- (SCPM_DISABLE != scpm_entry.scpm_status)) {
+ (scpm_entry.scpm_status != SCPM_DISABLE)) {
adev->scpm_enabled = true;
adev->scpm_status = scpm_entry.scpm_status;
} else {
@@ -458,10 +464,9 @@ static int psp_sw_init(void *handle)
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) {
- ret= psp_sysfs_init(adev);
- if (ret) {
+ ret = psp_sysfs_init(adev);
+ if (ret)
return ret;
- }
}
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
@@ -474,7 +479,8 @@ static int psp_sw_init(void *handle)
return ret;
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&psp->fence_buf_bo,
&psp->fence_buf_mc_addr,
&psp->fence_buf);
@@ -482,7 +488,8 @@ static int psp_sw_init(void *handle)
goto failed1;
ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
(void **)&psp->cmd_buf_mem);
if (ret)
@@ -520,6 +527,8 @@ static int psp_sw_fini(void *handle)
kfree(cmd);
cmd = NULL;
+ psp_free_shared_bufs(psp);
+
if (psp->km_ring.ring_mem)
amdgpu_bo_free_kernel(&adev->firmware.rbuf,
&psp->km_ring.ring_mem_mc_addr,
@@ -560,6 +569,26 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
return -ETIME;
}
+int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask, uint32_t msec_timeout)
+{
+ uint32_t val;
+ int i;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp->adev->no_hw_access)
+ return 0;
+
+ for (i = 0; i < msec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ msleep(1);
+ }
+
+ return -ETIME;
+}
+
static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
{
switch (cmd_id) {
@@ -643,7 +672,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
- memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
+ memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
/* In some cases, psp response status is not 0 even there is no
* problem while the command is submitted. Some version of PSP FW
@@ -699,8 +728,13 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
{
struct amdgpu_device *adev = psp->adev;
- uint32_t size = amdgpu_bo_size(tmr_bo);
- uint64_t tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+ uint32_t size = 0;
+ uint64_t tmr_pa = 0;
+
+ if (tmr_bo) {
+ size = amdgpu_bo_size(tmr_bo);
+ tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+ }
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
@@ -745,6 +779,16 @@ static int psp_load_toc(struct psp_context *psp,
return ret;
}
+static bool psp_boottime_tmr(struct psp_context *psp)
+{
+ switch (psp->adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 6):
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
@@ -816,8 +860,9 @@ static int psp_tmr_load(struct psp_context *psp)
cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
- DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
- amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
+ if (psp->tmr_bo)
+ DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
+ amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -828,7 +873,7 @@ static int psp_tmr_load(struct psp_context *psp)
}
static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
- struct psp_gfx_cmd_resp *cmd)
+ struct psp_gfx_cmd_resp *cmd)
{
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
@@ -969,6 +1014,27 @@ static int psp_rl_load(struct amdgpu_device *adev)
return ret;
}
+int psp_spatial_partition(struct psp_context *psp, int mode)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
+ cmd->cmd.cmd_spatial_part.mode = mode;
+
+ dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
static int psp_asd_initialize(struct psp_context *psp)
{
int ret;
@@ -1065,7 +1131,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
struct ta_context *context)
{
cmd->cmd_id = context->ta_load_type;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;
@@ -1136,9 +1202,8 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context)
context->resp_status = cmd->resp.status;
- if (!ret) {
+ if (!ret)
context->session_id = cmd->resp.session_id;
- }
release_psp_cmd_buf(psp);
@@ -1254,8 +1319,9 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
{
- return psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
- psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b;
+ return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
+ psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6);
}
/*
@@ -1363,6 +1429,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
/* Invoke xgmi ta again to get the link information */
if (psp_xgmi_peer_link_info_supported(psp)) {
struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output;
+ bool requires_reflection =
+ (psp->xgmi_context.supports_extended_data && get_extended_data) ||
+ psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6);
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
@@ -1377,11 +1446,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
topology->nodes[i].num_links = get_extended_data ?
topology->nodes[i].num_links +
link_info_output->nodes[i].num_links :
- link_info_output->nodes[i].num_links;
+ ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links :
+ link_info_output->nodes[i].num_links);
/* reflect the topology information for bi-directionality */
- if (psp->xgmi_context.supports_extended_data &&
- get_extended_data && topology->nodes[i].num_hops)
+ if (requires_reflection && topology->nodes[i].num_hops)
psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
}
}
@@ -1465,8 +1534,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_ras_intr_triggered())
return ret;
- if (ras_cmd->if_version > RAS_TA_HOST_IF_VER)
- {
+ if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
DRM_WARN("RAS: Unsupported Interface");
return -EINVAL;
}
@@ -1476,8 +1544,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
dev_warn(psp->adev->dev, "ECC switch disabled\n");
ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
- }
- else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
+ } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
dev_warn(psp->adev->dev,
"RAS internal register access blocked\n");
@@ -1573,11 +1640,10 @@ int psp_ras_initialize(struct psp_context *psp)
if (ret)
dev_warn(adev->dev, "PSP set boot config failed\n");
else
- dev_warn(adev->dev, "GECC will be disabled in next boot cycle "
- "if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+ dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
}
} else {
- if (1 == boot_cfg) {
+ if (boot_cfg == 1) {
dev_info(adev->dev, "GECC is enabled\n");
} else {
/* enable GECC in next boot cycle if it is disabled
@@ -1607,8 +1673,11 @@ int psp_ras_initialize(struct psp_context *psp)
if (amdgpu_ras_is_poison_mode_supported(adev))
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
- if (!adev->gmc.xgmi.connected_to_cpu)
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+ ras_cmd->ras_in_message.init_flags.xcc_mask =
+ adev->gfx.xcc_mask;
+ ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
ret = psp_ta_load(psp, &psp->ras_context.context);
@@ -1626,14 +1695,37 @@ int psp_ras_initialize(struct psp_context *psp)
}
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info)
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{
struct ta_ras_shared_memory *ras_cmd;
+ struct amdgpu_device *adev = psp->adev;
int ret;
+ uint32_t dev_mask;
if (!psp->ras_context.context.initialized)
return -EINVAL;
+ switch (info->block_id) {
+ case TA_RAS_BLOCK__GFX:
+ dev_mask = GET_MASK(GC, instance_mask);
+ break;
+ case TA_RAS_BLOCK__SDMA:
+ dev_mask = GET_MASK(SDMA0, instance_mask);
+ break;
+ case TA_RAS_BLOCK__VCN:
+ case TA_RAS_BLOCK__JPEG:
+ dev_mask = GET_MASK(VCN, instance_mask);
+ break;
+ default:
+ dev_mask = instance_mask;
+ break;
+ }
+
+ /* reuse sub_block_index for backward compatibility */
+ dev_mask <<= AMDGPU_RAS_INST_SHIFT;
+ dev_mask &= AMDGPU_RAS_INST_MASK;
+ info->sub_block_index |= dev_mask;
+
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
@@ -2077,10 +2169,12 @@ static int psp_hw_start(struct psp_context *psp)
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo;
- ret = psp_tmr_init(psp);
- if (ret) {
- DRM_ERROR("PSP tmr init failed!\n");
- return ret;
+ if (!psp_boottime_tmr(psp)) {
+ ret = psp_tmr_init(psp);
+ if (ret) {
+ DRM_ERROR("PSP tmr init failed!\n");
+ return ret;
+ }
}
skip_pin_bo:
@@ -2363,7 +2457,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
}
static int psp_execute_non_psp_fw_load(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode)
+ struct amdgpu_firmware_info *ucode)
{
int ret = 0;
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
@@ -2402,9 +2496,8 @@ static int psp_load_smu_fw(struct psp_context *psp)
(adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) {
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
- if (ret) {
+ if (ret)
DRM_WARN("Failed to set MP1 state prepare for reload\n");
- }
}
ret = psp_execute_non_psp_fw_load(psp, ucode);
@@ -2655,8 +2748,6 @@ static int psp_hw_fini(void *handle)
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
- psp_free_shared_bufs(psp);
-
return 0;
}
@@ -2716,9 +2807,8 @@ static int psp_suspend(void *handle)
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
- if (ret) {
+ if (ret)
DRM_ERROR("PSP ring stop failed\n");
- }
out:
return ret;
@@ -2967,7 +3057,7 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->sos.fw_version = le32_to_cpu(desc->fw_version);
psp->sos.feature_version = le32_to_cpu(desc->fw_version);
psp->sos.size_bytes = le32_to_cpu(desc->size_bytes);
- psp->sos.start_addr = ucode_start_addr;
+ psp->sos.start_addr = ucode_start_addr;
break;
case PSP_FW_TYPE_PSP_SYS_DRV:
psp->sys.fw_version = le32_to_cpu(desc->fw_version);
@@ -3491,7 +3581,7 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size
drm_dev_exit(idx);
}
-static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(usbc_pd_fw, 0644,
psp_usbc_pd_fw_sysfs_read,
psp_usbc_pd_fw_sysfs_write);
@@ -3621,6 +3711,7 @@ int amdgpu_psp_sysfs_init(struct amdgpu_device *adev)
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
if (!psp->adev) {
psp->adev = adev;
psp_v13_0_set_psp_funcs(psp);
@@ -3676,8 +3767,7 @@ static void psp_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_usbc_pd_fw);
}
-const struct amdgpu_ip_block_version psp_v3_1_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 3,
.minor = 1,
@@ -3685,8 +3775,7 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v10_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 10,
.minor = 0,
@@ -3694,8 +3783,7 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v11_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 11,
.minor = 0,
@@ -3711,8 +3799,7 @@ const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
.funcs = &psp_ip_funcs,
};
-const struct amdgpu_ip_block_version psp_v12_0_ip_block =
-{
+const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 12,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cf4f60c66122..2cae0b1a0b8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -455,6 +455,8 @@ extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, bool check_changed);
+extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t msec_timeout);
int psp_gpu_reset(struct amdgpu_device *adev);
int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
@@ -486,7 +488,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info);
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
int psp_ras_terminate(struct psp_context *psp);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
@@ -519,6 +521,8 @@ int psp_load_fw_list(struct psp_context *psp,
struct amdgpu_firmware_info **ucode_list, int ucode_count);
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
+int psp_spatial_partition(struct psp_context *psp, int mode);
+
int is_psp_fw_valid(struct psp_bin_desc bin);
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3ab8a88789c8..4769a18304d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -171,8 +171,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_data.err_addr = &err_rec;
- amdgpu_umc_fill_error_record(&err_data, address,
- (address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
+ amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
@@ -256,6 +255,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
int block_id;
uint32_t sub_block;
u64 address, value;
+ /* default value is 0 if the mask is not set by user */
+ u32 instance_mask = 0;
if (*pos)
return -EINVAL;
@@ -306,7 +307,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->op = op;
if (op == 2) {
- if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu %u",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
&sub_block, &address, &value) != 3 &&
sscanf(str, "%*s %*s %*s %u %llu %llu",
&sub_block, &address, &value) != 3)
@@ -314,6 +319,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->head.sub_block_index = sub_block;
data->inject.address = address;
data->inject.value = value;
+ data->inject.instance_mask = instance_mask;
}
} else {
if (size < sizeof(*data))
@@ -326,6 +332,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
+static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
+ struct ras_debug_if *data)
+{
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t mask, inst_mask = data->inject.instance_mask;
+
+ /* no need to set instance mask if there is only one instance */
+ if (num_xcc <= 1 && inst_mask) {
+ data->inject.instance_mask = 0;
+ dev_dbg(adev->dev,
+ "RAS inject mask(0x%x) isn't supported and force it to 0.\n",
+ inst_mask);
+
+ return;
+ }
+
+ switch (data->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ mask = GENMASK(num_xcc - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__VCN:
+ case AMDGPU_RAS_BLOCK__JPEG:
+ mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
+ break;
+ default:
+ mask = inst_mask;
+ break;
+ }
+
+ /* remove invalid bits in instance mask */
+ data->inject.instance_mask &= mask;
+ if (inst_mask != data->inject.instance_mask)
+ dev_dbg(adev->dev,
+ "Adjust RAS inject mask 0x%x to 0x%x\n",
+ inst_mask, data->inject.instance_mask);
+}
+
/**
* DOC: AMDGPU RAS debugfs control interface
*
@@ -341,7 +387,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
* name: the name of IP.
*
- * inject has two more members than head, they are address, value.
+ * inject has three more members than head, they are address, value and mask.
* As their names indicate, inject operation will write the
* value to the address.
*
@@ -365,7 +411,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
*
* echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
* echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
- * echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
*
* Where N, is the card which you want to affect.
*
@@ -382,13 +428,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
*
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
+ * The mask means instance mask, is optional, default value is 0x1.
*
* For instance,
*
* .. code-block:: bash
*
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
- * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
*
* How to check the result of the operation?
@@ -442,7 +489,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- if ((data.inject.address >= adev->gmc.mc_vram_size) ||
+ if ((data.inject.address >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
dev_warn(adev->dev, "RAS WARN: input address "
"0x%llx is invalid.",
@@ -460,6 +508,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
break;
}
+ amdgpu_ras_instance_mask_check(adev, &data);
+
/* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
break;
@@ -1115,15 +1165,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
block_info.address);
}
- if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
- if (block_obj->hw_ops->ras_error_inject)
- ret = block_obj->hw_ops->ras_error_inject(adev, info);
+ if (block_obj->hw_ops->ras_error_inject) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
+ ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
+ else /* Special ras_error_inject is defined (e.g: xgmi) */
+ ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
+ info->instance_mask);
} else {
- /* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */
- if (block_obj->hw_ops->ras_error_inject)
- ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
- else /*If not defined .ras_error_inject, use default ras_error_inject*/
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ /* default path */
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
}
if (ret)
@@ -1441,6 +1491,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *dir;
@@ -1451,6 +1502,7 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
&amdgpu_ras_debugfs_eeprom_ops);
debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
&con->bad_page_cnt_threshold);
+ debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
@@ -1597,8 +1649,7 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
{
/* Fatal error events are handled on host side */
- if (amdgpu_sriov_vf(adev) ||
- !amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
+ if (amdgpu_sriov_vf(adev))
return;
if (adev->nbio.ras &&
@@ -1636,8 +1687,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
}
}
- if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_poison_handler(adev, false);
+ amdgpu_umc_poison_handler(adev, false);
if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
@@ -2008,9 +2058,15 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
/* Perform full reset in fatal error mode */
if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- else
+ else {
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ reset_context.method = AMD_RESET_METHOD_MODE2;
+ }
+ }
+
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
atomic_set(&ras->in_recovery, 0);
@@ -2259,7 +2315,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
atomic_set(&con->in_recovery, 0);
con->eeprom_control.bad_channel_bitmap = 0;
- max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
+ max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
/* Todo: During test the SMU might fail to read the eeprom through I2C
@@ -2396,11 +2452,10 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
adev->ras_hw_enabled = adev->ras_enabled = 0;
- if (!adev->is_atom_fw ||
- !amdgpu_ras_asic_supported(adev))
+ if (!amdgpu_ras_asic_supported(adev))
return;
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
@@ -2625,7 +2680,8 @@ release_con:
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
{
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu)
return 1;
return 0;
}
@@ -3104,3 +3160,143 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
return 0;
}
+
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
+{
+ if (!err_type_name)
+ return;
+
+ switch (err_type) {
+ case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+ sprintf(err_type_name, "correctable");
+ break;
+ case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+ sprintf(err_type_name, "uncorrectable");
+ break;
+ default:
+ sprintf(err_type_name, "unknown");
+ break;
+ }
+}
+
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id)
+{
+ uint32_t err_status_lo_data, err_status_lo_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_lo, reg_entry->reg_lo);
+ err_status_lo_data = RREG32(err_status_lo_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
+ !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
+ return false;
+
+ *memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
+
+ return true;
+}
+
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt)
+{
+ uint32_t err_status_hi_data, err_status_hi_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_hi, reg_entry->reg_hi);
+ err_status_hi_data = RREG32(err_status_hi_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
+ !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
+ /* keep the check here in case we need to refer to the result later */
+ dev_dbg(adev->dev, "Invalid err_info field\n");
+
+ /* read err count */
+ *err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
+
+ return true;
+}
+
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count)
+{
+ uint32_t memory_id;
+ unsigned long err_cnt;
+ char err_type_name[16];
+ uint32_t i, j;
+
+ for (i = 0; i < reg_list_size; i++) {
+ /* query memory_id from err_status_lo */
+ if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
+ instance, &memory_id))
+ continue;
+
+ /* query err_cnt from err_status_hi */
+ if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
+ instance, &err_cnt) ||
+ !err_cnt)
+ continue;
+
+ *err_count += err_cnt;
+
+ /* log the errors */
+ amdgpu_ras_get_error_type_name(err_type, err_type_name);
+ if (!mem_list) {
+ /* memory_list is not supported */
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, memory_id);
+ } else {
+ for (j = 0; j < mem_list_size; j++) {
+ if (memory_id == mem_list[j].memory_id) {
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, mem_list[j].name);
+ break;
+ }
+ }
+ }
+ }
+}
+
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance)
+{
+ uint32_t err_status_lo_offset, err_status_hi_offset;
+ uint32_t i;
+
+ for (i = 0; i < reg_list_size; i++) {
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_lo, reg_list[i].reg_lo);
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_hi, reg_list[i].reg_hi);
+ WREG32(err_status_lo_offset, 0);
+ WREG32(err_status_hi_offset, 0);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 17b3d1992e80..46bf1889a9d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -32,6 +32,11 @@
struct amdgpu_iv_entry;
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
+/* position of instance value in sub_block_index of
+ * ta_ras_trigger_error_input, the sub block uses lower 12 bits
+ */
+#define AMDGPU_RAS_INST_MASK 0xfffff000
+#define AMDGPU_RAS_INST_SHIFT 0xc
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@@ -314,6 +319,45 @@ enum amdgpu_ras_ret {
AMDGPU_RAS_PT,
};
+/* ras error status reisger fields */
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
+#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18
+#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L
+#define ERR_STATUS__ERR_CNT__SHIFT 0x17
+#define ERR_STATUS__ERR_CNT_MASK 0x03800000L
+
+#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
+ ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
+
+#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
+ (adev->reg_offset[hwip][ip_inst][segment] + (reg))
+
+#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0)
+#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1)
+#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
+
+#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
+
+struct amdgpu_ras_err_status_reg_entry {
+ uint32_t hwip;
+ uint32_t ip_inst;
+ uint32_t seg_lo;
+ uint32_t reg_lo;
+ uint32_t seg_hi;
+ uint32_t reg_hi;
+ uint32_t reg_inst;
+ uint32_t flags;
+ const char *block_name;
+};
+
+struct amdgpu_ras_memory_id_entry {
+ uint32_t memory_id;
+ const char *name;
+};
+
struct ras_common_if {
enum amdgpu_ras_block block;
enum amdgpu_ras_error_type type;
@@ -385,6 +429,9 @@ struct amdgpu_ras {
/* Indicates smu whether need update bad channel info */
bool update_channel_flag;
+
+ /* Record special requirements of gpu reset caller */
+ uint32_t gpu_reset_flags;
};
struct ras_fs_data {
@@ -471,6 +518,7 @@ struct ras_inject_if {
struct ras_common_if head;
uint64_t address;
uint64_t value;
+ uint32_t instance_mask;
};
struct ras_cure_if {
@@ -508,7 +556,8 @@ struct amdgpu_ras_block_object {
};
struct amdgpu_ras_block_hw_ops {
- int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+ int (*ras_error_inject)(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask);
void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
@@ -696,4 +745,25 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj);
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id);
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt);
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count);
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index c2c2a7718613..0648dfe559af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -68,11 +68,24 @@
/* Table hdr is 'AMDR' */
#define RAS_TABLE_HDR_VAL 0x414d4452
-#define RAS_TABLE_VER 0x00010000
/* Bad GPU tag ‘BADG’ */
#define RAS_TABLE_HDR_BAD 0x42414447
+/*
+ * EEPROM Table structure v1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
+
/* Assume 2-Mbit size EEPROM and take up the whole space. */
#define RAS_TBL_SIZE_BYTES (256 * 1024)
#define RAS_TABLE_START 0
@@ -81,6 +94,35 @@
#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
/ RAS_TABLE_RECORD_SIZE)
+/*
+ * EEPROM Table structrue v2.1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE RAS INFO |
+ * | (available info size 4 Bytes) |
+ * | ( reserved size 252 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
+
+/* EEPROM Table V2_1 */
+#define RAS_TABLE_V2_1_INFO_SIZE 256
+#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
+#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
+ RAS_TABLE_V2_1_INFO_SIZE)
+#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) \
+ / RAS_TABLE_RECORD_SIZE)
+
/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
* offset off of RAS_TABLE_START. That is, this is something you can
* add to control->i2c_address, and then tell I2C layer to read
@@ -103,6 +145,10 @@
#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
+#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
+ RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
+
#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
@@ -230,6 +276,69 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
return res;
}
+static void
+__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
+
+ tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
+ (((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
+ (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
+ pp[0] = cpu_to_le32(tmp);
+}
+
+static void
+__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
+
+ tmp = le32_to_cpu(pp[0]);
+ rai->rma_status = tmp & 0xFF;
+ rai->health_percent = (tmp >> 8) & 0xFF;
+ rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
+}
+
+static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u8 *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ DRM_ERROR("Failed to alloc buf to write table ras info\n");
+ return -ENOMEM;
+ }
+
+ __encode_table_ras_info_to_buf(&control->tbl_rai, buf);
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ up_read(&adev->reset_domain->sem);
+
+ if (res < 0) {
+ DRM_ERROR("Failed to write EEPROM table ras info:%d", res);
+ } else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ DRM_ERROR("Short write:%d out of %d\n",
+ res, RAS_TABLE_V2_1_INFO_SIZE);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
+
+ kfree(buf);
+
+ return res;
+}
+
static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
{
int ii;
@@ -246,6 +355,21 @@ static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
return csum;
}
+static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
+{
+ int ii;
+ u8 *pp, csum;
+ size_t sz;
+
+ sz = sizeof(control->tbl_rai);
+ pp = (u8 *) &control->tbl_rai;
+ csum = 0;
+ for (ii = 0; ii < sz; ii++, pp++)
+ csum += *pp;
+
+ return csum;
+}
+
static int amdgpu_ras_eeprom_correct_header_tag(
struct amdgpu_ras_eeprom_control *control,
uint32_t header)
@@ -282,6 +406,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
u8 csum;
int res;
@@ -289,14 +414,37 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
mutex_lock(&control->ras_tbl_mutex);
hdr->header = RAS_TABLE_HDR_VAL;
- hdr->version = RAS_TABLE_VER;
- hdr->first_rec_offset = RAS_RECORD_START;
- hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+ if (adev->umc.ras &&
+ adev->umc.ras->set_eeprom_table_version)
+ adev->umc.ras->set_eeprom_table_version(hdr);
+ else
+ hdr->version = RAS_TABLE_VER_V1;
+
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ hdr->first_rec_offset = RAS_RECORD_START_V2_1;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE;
+ rai->rma_status = GPU_HEALTH_USABLE;
+ /**
+ * GPU health represented as a percentage.
+ * 0 means worst health, 100 means fully health.
+ */
+ rai->health_percent = 100;
+ /* ecc_page_threshold = 0 means disable bad page retirement */
+ rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+ } else {
+ hdr->first_rec_offset = RAS_RECORD_START;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+ }
csum = __calc_hdr_byte_sum(control);
+ if (hdr->version == RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
csum = -csum;
hdr->checksum = csum;
res = __write_table_header(control);
+ if (!res && hdr->version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
control->ras_num_recs = 0;
control->ras_fri = 0;
@@ -573,11 +721,19 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
"Saved bad pages %d reaches threshold value %d\n",
control->ras_num_recs, ras->bad_page_cnt_threshold);
control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
+ control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
+ control->tbl_rai.health_percent = 0;
+ }
}
- control->tbl_hdr.version = RAS_TABLE_VER;
- control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri);
- control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
control->tbl_hdr.checksum = 0;
buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
@@ -606,6 +762,17 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
goto Out;
}
+ /**
+ * bad page records have been stored in eeprom,
+ * now calculate gpu health percent
+ */
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
+ control->ras_num_recs < ras->bad_page_cnt_threshold)
+ control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
+ control->ras_num_recs) * 100) /
+ ras->bad_page_cnt_threshold;
+
/* Recalc the checksum.
*/
csum = 0;
@@ -613,10 +780,14 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
csum += *pp;
csum += __calc_hdr_byte_sum(control);
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
/* avoid sign extension when assigning to "checksum" */
csum = -csum;
control->tbl_hdr.checksum = csum;
res = __write_table_header(control);
+ if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
Out:
kfree(buf);
return res;
@@ -807,9 +978,12 @@ Out:
return res;
}
-uint32_t amdgpu_ras_eeprom_max_record_count(void)
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
{
- return RAS_MAX_RECORD_COUNT;
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ return RAS_MAX_RECORD_COUNT_V2_1;
+ else
+ return RAS_MAX_RECORD_COUNT;
}
static ssize_t
@@ -1051,8 +1225,14 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
int buf_size, res;
u8 csum, *buf, *pp;
- buf_size = RAS_TABLE_HEADER_SIZE +
- control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+
buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf) {
DRM_ERROR("Out of memory checking RAS table checksum.\n");
@@ -1080,6 +1260,39 @@ Out:
return res < 0 ? res : csum;
}
+static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n");
+ return -ENOMEM;
+ }
+
+ /**
+ * EEPROM table V2_1 supports ras info,
+ * read EEPROM table ras info
+ */
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address + control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res);
+ res = res >= 0 ? -EIO : res;
+ goto Out;
+ }
+
+ __decode_table_ras_info_from_buf(rai, buf);
+
+Out:
+ kfree(buf);
+ return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
+}
+
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit)
{
@@ -1102,8 +1315,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
return -EINVAL;
control->ras_header_offset = RAS_HDR_START;
- control->ras_record_offset = RAS_RECORD_START;
- control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
mutex_init(&control->ras_tbl_mutex);
/* Read the table header from EEPROM address */
@@ -1117,12 +1329,27 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
__decode_table_header_from_buf(hdr, buf);
- control->ras_num_recs = RAS_NUM_RECS(hdr);
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ } else {
+ control->ras_num_recs = RAS_NUM_RECS(hdr);
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ }
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
if (hdr->header == RAS_TABLE_HDR_VAL) {
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
control->ras_num_recs);
+
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
res = __verify_ras_table_checksum(control);
if (res)
DRM_ERROR("RAS table incorrect checksum or error:%d\n",
@@ -1136,6 +1363,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
ras->bad_page_cnt_threshold);
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
amdgpu_bad_page_threshold != 0) {
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
res = __verify_ras_table_checksum(control);
if (res)
DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 54d9bfe0881d..6dfd667f3013 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -26,8 +26,16 @@
#include <linux/i2c.h>
+#define RAS_TABLE_VER_V1 0x00010000
+#define RAS_TABLE_VER_V2_1 0x00021000
+
struct amdgpu_device;
+enum amdgpu_ras_gpu_health_status {
+ GPU_HEALTH_USABLE = 0,
+ GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
+};
+
enum amdgpu_ras_eeprom_err_type {
AMDGPU_RAS_EEPROM_ERR_NA,
AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
@@ -43,9 +51,18 @@ struct amdgpu_ras_eeprom_table_header {
uint32_t checksum;
} __packed;
+struct amdgpu_ras_eeprom_table_ras_info {
+ u8 rma_status;
+ u8 health_percent;
+ u16 ecc_page_threshold;
+ u32 padding[64 - 1];
+} __packed;
+
struct amdgpu_ras_eeprom_control {
struct amdgpu_ras_eeprom_table_header tbl_hdr;
+ struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
+
/* Base I2C EEPPROM 19-bit memory address,
* where the table is located. For more information,
* see top of amdgpu_eeprom.c.
@@ -58,6 +75,7 @@ struct amdgpu_ras_eeprom_control {
* right after the header.
*/
u32 ras_header_offset;
+ u32 ras_info_offset;
u32 ras_record_offset;
/* Number of records in the table.
@@ -124,7 +142,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records, const u32 num);
-uint32_t amdgpu_ras_eeprom_max_record_count(void);
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 6437ead87e5f..eec41ad30406 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -40,6 +40,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
ret = aldebaran_reset_init(adev);
break;
case IP_VERSION(11, 0, 7):
@@ -61,6 +62,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
ret = aldebaran_reset_fini(adev);
break;
case IP_VERSION(11, 0, 7):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 49de3a3eebc7..80d6e132e409 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -50,6 +50,26 @@
*/
/**
+ * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
+ *
+ * @type: ring type for which to return the limit.
+ */
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
+{
+ switch (type) {
+ case AMDGPU_RING_TYPE_GFX:
+ /* Need to keep at least 192 on GFX7+ for old radv. */
+ return 192;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ return 125;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ return 16;
+ default:
+ return 49;
+ }
+}
+
+/**
* amdgpu_ring_alloc - allocate space on the ring buffer
*
* @ring: amdgpu_ring structure holding ring information
@@ -58,7 +78,7 @@
* Allocate @ndw dwords in the ring buffer (all asics).
* Returns 0 on success, error on failure.
*/
-int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
{
/* Align requested size with padding so unlock_commit can
* pad safely */
@@ -182,6 +202,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
int sched_hw_submission = amdgpu_sched_hw_submission;
u32 *num_sched;
u32 hw_ip;
+ unsigned int max_ibs_dw;
/* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both
@@ -290,6 +311,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
+ max_ibs_dw = ring->funcs->emit_frame_size +
+ amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+ max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ if (WARN_ON(max_ibs_dw > max_dw))
+ max_dw = max_ibs_dw;
+
ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
ring->buf_mask = (ring->ring_size / 4) - 1;
@@ -361,6 +389,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
+ } else {
+ kfree(ring->fence_drv.fences);
}
dma_fence_put(ring->vmid_wait);
@@ -403,11 +433,18 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
+ unsigned long flags;
+
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
+ spin_lock_irqsave(fence->lock, flags);
+ if (!dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, -ENODATA);
+ spin_unlock_irqrestore(fence->lock, flags);
+
atomic_inc(&ring->adev->gpu_reset_counter);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
@@ -478,6 +515,70 @@ static const struct file_operations amdgpu_debugfs_ring_fops = {
.llseek = default_llseek
};
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ volatile u32 *mqd;
+ int r;
+ uint32_t value, result;
+
+ if (*pos & 3 || size & 3)
+ return -EINVAL;
+
+ result = 0;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+ if (r) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ while (size) {
+ if (*pos >= ring->mqd_size)
+ goto done;
+
+ value = mqd[*pos/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto done;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+
+done:
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ mqd = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ return r;
+
+ return result;
+}
+
+static const struct file_operations amdgpu_debugfs_mqd_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_mqd_read,
+ .llseek = default_llseek
+};
+
+static int amdgpu_debugfs_ring_error(void *data, u64 val)
+{
+ struct amdgpu_ring *ring = data;
+
+ amdgpu_fence_driver_set_error(ring, val);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL,
+ amdgpu_debugfs_ring_error, "%lld\n");
+
#endif
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
@@ -489,10 +590,21 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name);
- debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring,
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
&amdgpu_debugfs_ring_fops,
ring->ring_size + 12);
+ if (ring->mqd_obj) {
+ sprintf(name, "amdgpu_mqd_%s", ring->name);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_mqd_fops,
+ ring->mqd_size);
+ }
+
+ sprintf(name, "amdgpu_error_%s", ring->name);
+ debugfs_create_file(name, 0200, root, ring,
+ &amdgpu_debugfs_error_fops);
+
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 2474cb71e476..028ff075db51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -37,8 +37,8 @@ struct amdgpu_job;
struct amdgpu_vm;
/* max number of rings */
-#define AMDGPU_MAX_RINGS 28
-#define AMDGPU_MAX_HWIP_RINGS 8
+#define AMDGPU_MAX_RINGS 124
+#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
@@ -126,6 +126,7 @@ struct amdgpu_fence_driver {
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
@@ -212,6 +213,8 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow, int vmid);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
@@ -253,12 +256,14 @@ struct amdgpu_ring {
uint32_t buf_mask;
u32 idx;
u32 xcc_id;
+ u32 xcp_id;
u32 me;
u32 pipe;
u32 queue;
struct amdgpu_bo *mqd_obj;
uint64_t mqd_gpu_addr;
void *mqd_ptr;
+ unsigned mqd_size;
uint64_t eop_gpu_addr;
u32 doorbell_index;
bool use_doorbell;
@@ -312,6 +317,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
@@ -325,6 +331,7 @@ struct amdgpu_ring {
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 85fb730d9fc8..35e0ae9acadc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -31,12 +31,13 @@
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
*
* @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
*
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
*/
-void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
- if (adev->gfx.rlc.in_safe_mode)
+ if (adev->gfx.rlc.in_safe_mode[xcc_id])
return;
/* if RLC is not enabled, do nothing */
@@ -46,8 +47,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
- adev->gfx.rlc.funcs->set_safe_mode(adev);
- adev->gfx.rlc.in_safe_mode = true;
+ adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = true;
}
}
@@ -55,12 +56,13 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
*
* @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
*
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
*/
-void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
- if (!(adev->gfx.rlc.in_safe_mode))
+ if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
return;
/* if RLC is not enabled, do nothing */
@@ -70,8 +72,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
- adev->gfx.rlc.funcs->unset_safe_mode(adev);
- adev->gfx.rlc.in_safe_mode = false;
+ adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = false;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 23f060db9255..80b263646966 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -157,8 +157,8 @@ typedef struct _RLC_TABLE_OF_CONTENT {
struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
- void (*set_safe_mode)(struct amdgpu_device *adev);
- void (*unset_safe_mode)(struct amdgpu_device *adev);
+ void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+ void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
@@ -201,7 +201,7 @@ struct amdgpu_rlc {
u32 cp_table_size;
/* safe mode for updating CG/PG state */
- bool in_safe_mode;
+ bool in_safe_mode[8];
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
@@ -260,8 +260,8 @@ struct amdgpu_rlc {
struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl;
};
-void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
-void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 231ca06bc9c7..78ec3420ef85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -64,7 +64,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
}
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
- unsigned vmid)
+ unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
uint64_t csa_mc_addr;
@@ -252,6 +252,13 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
if (!duplicate && (instance != i))
continue;
else {
+ /* Use a single copy per SDMA firmware type. PSP uses the same instance for all
+ * groups of SDMAs */
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.num_inst_per_aid == i) {
+ break;
+ }
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
info->fw = adev->sdma.instance[i].fw;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index fc8528812598..513ac22120c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -26,7 +26,7 @@
#include "amdgpu_ras.h"
/* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES 8
+#define AMDGPU_MAX_SDMA_INSTANCES 16
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
@@ -37,9 +37,19 @@ enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE5,
AMDGPU_SDMA_IRQ_INSTANCE6,
AMDGPU_SDMA_IRQ_INSTANCE7,
+ AMDGPU_SDMA_IRQ_INSTANCE8,
+ AMDGPU_SDMA_IRQ_INSTANCE9,
+ AMDGPU_SDMA_IRQ_INSTANCE10,
+ AMDGPU_SDMA_IRQ_INSTANCE11,
+ AMDGPU_SDMA_IRQ_INSTANCE12,
+ AMDGPU_SDMA_IRQ_INSTANCE13,
+ AMDGPU_SDMA_IRQ_INSTANCE14,
+ AMDGPU_SDMA_IRQ_INSTANCE15,
AMDGPU_SDMA_IRQ_LAST
};
+#define NUM_SDMA(x) hweight32(x)
+
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
@@ -49,6 +59,35 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring ring;
struct amdgpu_ring page;
bool burst_nop;
+ uint32_t aid_id;
+};
+
+enum amdgpu_sdma_ras_memory_id {
+ AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
+ AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
+ AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
+ AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
+ AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
+ AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
+ AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
+ AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
+ AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
+ AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
+ AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
+ AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
+ AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
+ AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
+ AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
+ AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
+ AMDGPU_SDMA_UCODE_BUF = 17,
+ AMDGPU_SDMA_RB_CMD_BUF = 18,
+ AMDGPU_SDMA_IB_CMD_BUF = 19,
+ AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
+ AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
+ AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
+ AMDGPU_SDMA_DATA_LUT_FIFO = 23,
+ AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
+ AMDGPU_SDMA_MEMORY_BLOCK_LAST,
};
struct amdgpu_sdma_ras {
@@ -66,6 +105,8 @@ struct amdgpu_sdma {
struct amdgpu_irq_src srbm_write_irq;
int num_instances;
+ uint32_t sdma_mask;
+ int num_inst_per_aid;
uint32_t srbm_soft_reset;
bool has_page_queue;
struct ras_common_if *ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index c7a823f3f2c5..89c38d864471 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -30,6 +30,7 @@ struct amdgpu_smuio_funcs {
void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
u32 (*get_die_id)(struct amdgpu_device *adev);
u32 (*get_socket_id)(struct amdgpu_device *adev);
+ enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2cd081cbf706..0534ab716809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -38,7 +38,6 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
-#include <linux/swiotlb.h>
#include <linux/dma-buf.h>
#include <linux/sizes.h>
#include <linux/module.h>
@@ -65,7 +64,7 @@
MODULE_IMPORT_NS(DMA_BUF);
-#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
+#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_tt *ttm,
@@ -184,11 +183,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
- unsigned window, struct amdgpu_ring *ring,
+ unsigned int window, struct amdgpu_ring *ring,
bool tmz, uint64_t *size, uint64_t *addr)
{
struct amdgpu_device *adev = ring->adev;
- unsigned offset, num_pages, num_dw, num_bytes;
+ unsigned int offset, num_pages, num_dw, num_bytes;
uint64_t src_addr, dst_addr;
struct amdgpu_job *job;
void *cpu_addr;
@@ -229,7 +228,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job);
@@ -384,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
+ false);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -631,6 +631,7 @@ struct amdgpu_ttm_tt {
struct task_struct *usertask;
uint32_t userflags;
bool bound;
+ int32_t pool_id;
};
#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
@@ -800,6 +801,44 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
sg_free_table(ttm->sg);
}
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+ struct ttm_tt *ttm, uint64_t flags)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ uint64_t total_pages = ttm->num_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ int i;
+ uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
+
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
+
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ /* MQD page: use default flags */
+ amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ 1, &gtt->ttm.dma_address[page_idx], flags);
+ /*
+ * Ctrl pages - modify the memory type to NC (ctrl_flags) from
+ * the second page of the BO onward.
+ */
+ amdgpu_gart_bind(adev,
+ gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+ pages_per_xcc - 1,
+ &gtt->ttm.dma_address[page_idx + 1],
+ ctrl_flags);
+ }
+}
+
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
@@ -812,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
- uint64_t page_idx = 1;
-
- amdgpu_gart_bind(adev, gtt->offset, page_idx,
- gtt->ttm.dma_address, flags);
-
- /* The memory type of the first page defaults to UC. Now
- * modify the memory type to NC from the second page of
- * the BO onward.
- */
- flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
-
- amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
- ttm->num_pages - page_idx,
- &(gtt->ttm.dma_address[page_idx]), flags);
+ amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
} else {
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags);
@@ -1029,15 +1054,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_ttm_tt *gtt;
enum ttm_caching caching;
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
- if (gtt == NULL) {
+ if (!gtt)
return NULL;
- }
+
gtt->gobj = &bo->base;
+ if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
+ gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+ else
+ gtt->pool_id = abo->xcp_id;
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
caching = ttm_write_combined;
@@ -1064,6 +1094,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ struct ttm_pool *pool;
pgoff_t i;
int ret;
@@ -1078,7 +1109,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
return 0;
- ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+ ret = ttm_pool_alloc(pool, ttm, ctx);
if (ret)
return ret;
@@ -1099,6 +1134,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
{
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
+ struct ttm_pool *pool;
pgoff_t i;
amdgpu_ttm_backend_unbind(bdev, ttm);
@@ -1117,7 +1153,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
ttm->pages[i]->mapping = NULL;
adev = amdgpu_ttm_adev(bdev);
- return ttm_pool_free(&adev->mman.bdev.pool, ttm);
+
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+
+ return ttm_pool_free(pool, ttm);
}
/**
@@ -1414,7 +1456,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
memcpy(adev->mman.sdma_access_ptr, buf, len);
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
&job);
@@ -1623,14 +1665,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
-static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+ uint32_t reserve_size)
{
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
memset(ctx, 0, sizeof(*ctx));
ctx->c2p_train_data_offset =
- ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
+ ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
@@ -1648,11 +1691,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
- int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
+ uint32_t reserve_size = 0;
+ int ret;
- if (!amdgpu_sriov_vf(adev)) {
+ if (adev->bios && !amdgpu_sriov_vf(adev)) {
if (amdgpu_atomfirmware_mem_training_supported(adev))
mem_train_support = true;
else
@@ -1666,14 +1710,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively
*/
- adev->mman.discovery_tmr_size =
- amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
- if (!adev->mman.discovery_tmr_size)
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+ if (adev->bios)
+ reserve_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+
+ if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ reserve_size = max(reserve_size, (uint32_t)280 << 20);
+ else if (!reserve_size)
+ reserve_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
- amdgpu_ttm_training_data_block_init(adev);
+ amdgpu_ttm_training_data_block_init(adev, reserve_size);
ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
@@ -1687,20 +1735,58 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
- ret = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
- adev->mman.discovery_tmr_size,
- &adev->mman.discovery_memory,
- NULL);
- if (ret) {
- DRM_ERROR("alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
- return ret;
+ if (!adev->gmc.is_app_apu) {
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size,
+ reserve_size, &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
+ NULL, NULL);
+ return ret;
+ }
+ } else {
+ DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
}
return 0;
}
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+ return 0;
+
+ adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+ sizeof(*adev->mman.ttm_pools),
+ GFP_KERNEL);
+ if (!adev->mman.ttm_pools)
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+ ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+ adev->gmc.mem_partitions[i].numa.node,
+ false, false);
+ }
+ return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+ return;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+ ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+ kfree(adev->mman.ttm_pools);
+ adev->mman.ttm_pools = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1727,6 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
}
+
+ r = amdgpu_ttm_pools_init(adev);
+ if (r) {
+ DRM_ERROR("failed to init ttm pools(%d).\n", r);
+ return r;
+ }
adev->mman.initialized = true;
/* Initialize VRAM pool with all of VRAM divided into pages */
@@ -1744,6 +1836,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
+ else if (adev->gmc.is_app_apu)
+ DRM_DEBUG_DRIVER(
+ "No need to ioremap when real vram size is 0\n");
else
#endif
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
@@ -1755,9 +1850,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
*place on the VRAM, so reserve it early.
*/
r = amdgpu_ttm_fw_reserve_vram_init(adev);
- if (r) {
+ if (r)
return r;
- }
/*
*The reserved vram for driver must be pinned to the specified
@@ -1781,49 +1875,46 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
- * and driver. */
- r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
- &adev->mman.stolen_vga_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
- adev->mman.stolen_extended_size,
- &adev->mman.stolen_extended_memory,
- NULL);
- if (r)
- return r;
- r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
- adev->mman.stolen_reserved_size,
- &adev->mman.stolen_reserved_memory,
- NULL);
- if (r)
- return r;
+ * and driver.
+ */
+ if (!adev->gmc.is_app_apu) {
+ r = amdgpu_bo_create_kernel_at(adev, 0,
+ adev->mman.stolen_vga_size,
+ &adev->mman.stolen_vga_memory,
+ NULL);
+ if (r)
+ return r;
- DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
- (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
-
- /* Compute GTT size, either based on 1/2 the size of RAM size
- * or whatever the user passed on module init */
- if (amdgpu_gtt_size == -1) {
- struct sysinfo si;
-
- si_meminfo(&si);
- /* Certain GL unit tests for large textures can cause problems
- * with the OOM killer since there is no way to link this memory
- * to a process. This was originally mitigated (but not necessarily
- * eliminated) by limiting the GTT size. The problem is this limit
- * is often too low for many modern games so just make the limit 1/2
- * of system memory which aligns with TTM. The OOM accounting needs
- * to be addressed, but we shouldn't prevent common 3D applications
- * from being usable just to potentially mitigate that corner case.
- */
- gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
- (u64)si.totalram * si.mem_unit / 2);
+ r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+ adev->mman.stolen_extended_size,
+ &adev->mman.stolen_extended_memory,
+ NULL);
+
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->mman.stolen_reserved_offset,
+ adev->mman.stolen_reserved_size,
+ &adev->mman.stolen_reserved_memory,
+ NULL);
+ if (r)
+ return r;
} else {
- gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+ DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
}
+ DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
+ (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
+
+ /* Compute GTT size, either based on TTM limit
+ * or whatever the user passed on module init.
+ */
+ if (amdgpu_gtt_size == -1)
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ else
+ gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
if (r) {
@@ -1831,7 +1922,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
- (unsigned)(gtt_size / (1024 * 1024)));
+ (unsigned int)(gtt_size / (1024 * 1024)));
/* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev);
@@ -1858,7 +1949,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
-
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL,
@@ -1874,18 +1964,24 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
int idx;
+
if (!adev->mman.initialized)
return;
+ amdgpu_ttm_pools_fini(adev);
+
amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the stolen vga memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
- /* return the IP Discovery TMR memory back to VRAM */
- amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
- if (adev->mman.stolen_reserved_size)
- amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
- NULL, NULL);
+ if (!adev->gmc.is_app_apu) {
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ /* return the FW reserved memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ if (adev->mman.stolen_reserved_size)
+ amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+ NULL, NULL);
+ }
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
amdgpu_ttm_fw_reserve_vram_fini(adev);
@@ -1927,7 +2023,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
int r;
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
- adev->mman.buffer_funcs_enabled == enable)
+ adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
return;
if (enable) {
@@ -1936,7 +2032,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
- r = drm_sched_entity_init(&adev->mman.entity,
+ r = drm_sched_entity_init(&adev->mman.high_pr,
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
@@ -1944,8 +2040,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
r);
return;
}
+
+ r = drm_sched_entity_init(&adev->mman.low_pr,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+ r);
+ goto error_free_entity;
+ }
} else {
- drm_sched_entity_destroy(&adev->mman.entity);
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+ drm_sched_entity_destroy(&adev->mman.low_pr);
dma_fence_put(man->move);
man->move = NULL;
}
@@ -1957,6 +2063,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
size = adev->gmc.visible_vram_size;
man->size = size;
adev->mman.buffer_funcs_enabled = enable;
+
+ return;
+
+error_free_entity:
+ drm_sched_entity_destroy(&adev->mman.high_pr);
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -1964,14 +2075,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
unsigned int num_dw,
struct dma_resv *resv,
bool vm_needs_flush,
- struct amdgpu_job **job)
+ struct amdgpu_job **job,
+ bool delayed)
{
enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED;
int r;
-
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+ struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
+ &adev->mman.high_pr;
+ r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job);
if (r)
@@ -1997,10 +2110,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
bool vm_needs_flush, bool tmz)
{
struct amdgpu_device *adev = ring->adev;
- unsigned num_loops, num_dw;
+ unsigned int num_loops, num_dw;
struct amdgpu_job *job;
uint32_t max_bytes;
- unsigned i;
+ unsigned int i;
int r;
if (!direct_submit && !ring->sched.ready) {
@@ -2012,7 +2125,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
- resv, vm_needs_flush, &job);
+ resv, vm_needs_flush, &job, false);
if (r)
return r;
@@ -2048,7 +2161,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush)
+ bool vm_needs_flush, bool delayed)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
@@ -2061,7 +2174,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
- &job);
+ &job, delayed);
if (r)
return r;
@@ -2084,7 +2197,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **f)
+ struct dma_fence **f,
+ bool delayed)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2113,7 +2227,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
goto error;
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true);
+ &next, true, delayed);
if (r)
goto error;
@@ -2164,7 +2278,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e2cd5894afc9..6d0d66e40db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -49,6 +49,7 @@ struct amdgpu_gtt_mgr {
struct amdgpu_mman {
struct ttm_device bdev;
+ struct ttm_pool *ttm_pools;
bool initialized;
void __iomem *aper_base_kaddr;
@@ -58,8 +59,10 @@ struct amdgpu_mman {
bool buffer_funcs_enabled;
struct mutex gtt_window_lock;
- /* Scheduler entity for buffer moves */
- struct drm_sched_entity entity;
+ /* High priority scheduler entity for buffer moves */
+ struct drm_sched_entity high_pr;
+ /* Low priority scheduler entity for VRAM clearing */
+ struct drm_sched_entity low_pr;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;
@@ -78,7 +81,8 @@ struct amdgpu_mman {
/* discovery */
uint8_t *discovery_bin;
uint32_t discovery_tmr_size;
- struct amdgpu_bo *discovery_memory;
+ /* fw reserved memory */
+ struct amdgpu_bo *fw_reserved_memory;
/* firmware VRAM reservation */
u64 fw_vram_usage_start_offset;
@@ -150,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **fence);
+ struct dma_fence **fence,
+ bool delayed);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index f76b1cb8baf8..16807ff96dc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -748,7 +748,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
u8 *ucode_addr;
- if (NULL == ucode->fw)
+ if (!ucode->fw)
return 0;
ucode->mc_addr = mc_addr;
@@ -972,7 +972,7 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
uint8_t *src_addr = NULL;
uint8_t *dst_addr = NULL;
- if (NULL == ucode->fw)
+ if (!ucode->fw)
return 0;
comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
@@ -1043,6 +1043,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
const struct gfx_firmware_header_v1_0 *cp_hdr;
+
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
adev->firmware.fw_buf_ptr + fw_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 1edf8e6aeb16..db0d94ca4ffc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -169,27 +169,31 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
{
int ret = AMDGPU_RAS_SUCCESS;
- if (!amdgpu_sriov_vf(adev)) {
- if (!adev->gmc.xgmi.connected_to_cpu) {
- struct ras_err_data err_data = {0, 0, 0, NULL};
- struct ras_common_if head = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- };
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
-
- ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
-
- if (ret == AMDGPU_RAS_SUCCESS && obj) {
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
- }
- } else if (reset) {
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ if (reset) {
/* MCA poison handler is only responsible for GPU reset,
* let MCA notifier do page retirement.
*/
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
amdgpu_ras_reset_gpu(adev);
}
+ return ret;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ }
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
adev->virt.ops->ras_poison_handler(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 86133f77a9a4..43321f57f557 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -59,6 +59,8 @@ struct amdgpu_umc_ras {
void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
+ /* support different eeprom table version for different asic */
+ void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
};
struct amdgpu_umc_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
index 919d9d401750..107f9bb0e24f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
@@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata {
} srbm;
};
+struct amdgpu_debugfs_regs2_iocdata_v2 {
+ __u32 use_srbm, use_grbm, pg_lock;
+ struct {
+ __u32 se, sh, instance;
+ } grbm;
+ struct {
+ __u32 me, pipe, queue, vmid;
+ } srbm;
+ u32 xcc_id;
+};
+
+struct amdgpu_debugfs_gprwave_iocdata {
+ u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
+ struct {
+ u32 thread, vpgr_or_sgpr;
+ } gpr;
+};
+
/*
* MMIO debugfs state data (per file* handle)
*/
struct amdgpu_debugfs_regs2_data {
struct amdgpu_device *adev;
struct mutex lock;
- struct amdgpu_debugfs_regs2_iocdata id;
+ struct amdgpu_debugfs_regs2_iocdata_v2 id;
+};
+
+struct amdgpu_debugfs_gprwave_data {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ struct amdgpu_debugfs_gprwave_iocdata id;
};
enum AMDGPU_DEBUGFS_REGS2_CMDS {
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
+};
+
+enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
+ AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0,
};
+//reg2 interface
#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
+
+//gprwave interface
+#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 6887109abb13..b7441654e6fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -96,16 +96,16 @@
*/
struct amdgpu_uvd_cs_ctx {
struct amdgpu_cs_parser *parser;
- unsigned reg, count;
- unsigned data0, data1;
- unsigned idx;
+ unsigned int reg, count;
+ unsigned int data0, data1;
+ unsigned int idx;
struct amdgpu_ib *ib;
/* does the IB has a msg command */
bool has_msg_cmd;
/* minimum buffer sizes */
- unsigned *buf_sizes;
+ unsigned int *buf_sizes;
};
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -186,7 +186,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned family_id;
+ unsigned int family_id;
int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -275,7 +275,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
if (adev->asic_type < CHIP_VEGA20) {
- unsigned version_major, version_minor;
+ unsigned int version_major, version_minor;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
@@ -420,7 +420,7 @@ int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
int i, j, idx;
bool in_ras_intr = amdgpu_ras_intr_triggered();
@@ -469,7 +469,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
int amdgpu_uvd_resume(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
int i, idx;
@@ -491,7 +491,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
adev->uvd.inst[i].saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
- unsigned offset;
+ unsigned int offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
@@ -542,6 +542,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
{
int i;
+
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
@@ -579,7 +580,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
@@ -589,6 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
if (cmd == 0x0 || cmd == 0x3) {
/* yes, force it into VRAM */
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
amdgpu_bo_placement_from_domain(bo, domain);
}
amdgpu_uvd_force_into_uvd_segment(bo);
@@ -609,21 +611,21 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
* Peek into the decode message and calculate the necessary buffer sizes.
*/
static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
- unsigned buf_sizes[])
+ unsigned int buf_sizes[])
{
- unsigned stream_type = msg[4];
- unsigned width = msg[6];
- unsigned height = msg[7];
- unsigned dpb_size = msg[9];
- unsigned pitch = msg[28];
- unsigned level = msg[57];
+ unsigned int stream_type = msg[4];
+ unsigned int width = msg[6];
+ unsigned int height = msg[7];
+ unsigned int dpb_size = msg[9];
+ unsigned int pitch = msg[28];
+ unsigned int level = msg[57];
- unsigned width_in_mb = width / 16;
- unsigned height_in_mb = ALIGN(height / 16, 2);
- unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned int width_in_mb = width / 16;
+ unsigned int height_in_mb = ALIGN(height / 16, 2);
+ unsigned int fs_in_mb = width_in_mb * height_in_mb;
- unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
- unsigned min_ctx_size = ~0;
+ unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
+ unsigned int min_ctx_size = ~0;
image_size = width * height;
image_size += image_size / 2;
@@ -631,7 +633,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
switch (stream_type) {
case 0: /* H264 */
- switch(level) {
+ switch (level) {
case 30:
num_dpb_buffer = 8100 / fs_in_mb;
break;
@@ -709,7 +711,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
break;
case 7: /* H264 Perf */
- switch(level) {
+ switch (level) {
case 30:
num_dpb_buffer = 8100 / fs_in_mb;
break;
@@ -742,7 +744,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
/* reference picture buffer */
min_dpb_size = image_size * num_dpb_buffer;
- if (!adev->uvd.use_ctx_buf){
+ if (!adev->uvd.use_ctx_buf) {
/* macroblock context buffer */
min_dpb_size +=
width_in_mb * height_in_mb * num_dpb_buffer * 192;
@@ -805,7 +807,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
* Make sure that we don't open up to many sessions.
*/
static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
- struct amdgpu_bo *bo, unsigned offset)
+ struct amdgpu_bo *bo, unsigned int offset)
{
struct amdgpu_device *adev = ctx->parser->adev;
int32_t *msg, msg_type, handle;
@@ -911,7 +913,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
@@ -930,7 +932,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
if (cmd < 0x4) {
if ((end - start) < ctx->buf_sizes[cmd]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
- (unsigned)(end - start),
+ (unsigned int)(end - start),
ctx->buf_sizes[cmd]);
return -EINVAL;
}
@@ -938,7 +940,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
} else if (cmd == 0x206) {
if ((end - start) < ctx->buf_sizes[4]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
- (unsigned)(end - start),
+ (unsigned int)(end - start),
ctx->buf_sizes[4]);
return -EINVAL;
}
@@ -949,14 +951,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
if (!ctx->parser->adev->uvd.address_64_bit) {
if ((start >> 28) != ((end - 1) >> 28)) {
- DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
+ DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
start, end);
return -EINVAL;
}
if ((cmd == 0 || cmd == 0x3) &&
(start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
- DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
+ DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
start, end);
return -EINVAL;
}
@@ -990,7 +992,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
ctx->idx++;
for (i = 0; i <= ctx->count; ++i) {
- unsigned reg = ctx->reg + i;
+ unsigned int reg = ctx->reg + i;
if (ctx->idx >= ctx->ib->length_dw) {
DRM_ERROR("Register command after end of CS!\n");
@@ -1036,7 +1038,8 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
- unsigned type = CP_PACKET_GET_TYPE(cmd);
+ unsigned int type = CP_PACKET_GET_TYPE(cmd);
+
switch (type) {
case PACKET_TYPE0:
ctx->reg = CP_PACKET0_GET_REG(cmd);
@@ -1070,7 +1073,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_ib *ib)
{
struct amdgpu_uvd_cs_ctx ctx = {};
- unsigned buf_sizes[] = {
+ unsigned int buf_sizes[] = {
[0x00000000] = 2048,
[0x00000001] = 0xFFFFFFFF,
[0x00000002] = 0xFFFFFFFF,
@@ -1185,8 +1188,9 @@ err_free:
}
/* multiple fence commands without any stream commands in between can
- crash the vcpu so just try to emmit a dummy create/destroy msg to
- avoid this */
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
@@ -1252,15 +1256,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work);
- unsigned fences = 0, i, j;
+ unsigned int fences = 0, i, j;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
if (adev->uvd.harvest_config & (1 << i))
continue;
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
- for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+ for (j = 0; j < adev->uvd.num_enc_rings; ++j)
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
- }
}
if (fences == 0) {
@@ -1356,7 +1359,7 @@ error:
*/
uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned int i;
uint32_t used_handles = 0;
for (i = 0; i < adev->uvd.max_handles; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index e2b7324a70cb..1904edf68407 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -99,7 +99,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
{
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned ucode_version, version_major, version_minor, binary_id;
+ unsigned int ucode_version, version_major, version_minor, binary_id;
int i, r;
switch (adev->asic_type) {
@@ -207,7 +207,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
*/
int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned int i;
if (adev->vce.vcpu_bo == NULL)
return 0;
@@ -286,7 +286,7 @@ int amdgpu_vce_resume(struct amdgpu_device *adev)
{
void *cpu_addr;
const struct common_firmware_header *hdr;
- unsigned offset;
+ unsigned int offset;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -332,7 +332,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vce.idle_work.work);
- unsigned i, count = 0;
+ unsigned int i, count = 0;
for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
@@ -409,6 +409,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
struct amdgpu_ring *ring = &adev->vce.ring[0];
int i, r;
+
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
uint32_t handle = atomic_read(&adev->vce.handles[i]);
@@ -436,7 +437,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 1024;
+ const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ib ib_msg;
@@ -528,7 +529,7 @@ err:
static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence)
{
- const unsigned ib_size_dw = 1024;
+ const unsigned int ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
@@ -596,12 +597,12 @@ err:
*/
static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
struct amdgpu_ib *ib, int lo, int hi,
- unsigned size, int32_t index)
+ unsigned int size, int32_t index)
{
int64_t offset = ((uint64_t)size) * ((int64_t)index);
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *mapping;
- unsigned i, fpfn, lpfn;
+ unsigned int i, fpfn, lpfn;
struct amdgpu_bo *bo;
uint64_t addr;
int r;
@@ -619,7 +620,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index);
return r;
}
@@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
* Patch relocation inside command stream with real buffer address
*/
static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
- int lo, int hi, unsigned size, uint32_t index)
+ int lo, int hi, unsigned int size, uint32_t index)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
@@ -662,14 +663,14 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
if (r) {
- DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
addr, lo, hi, size, index);
return r;
}
if ((addr + (uint64_t)size) >
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
+ DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
addr, lo, hi);
return -EINVAL;
}
@@ -692,12 +693,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
* @allocated: allocated a new handle?
*
* Validates the handle and return the found session index or -EINVAL
- * we we don't have another free session index.
+ * we don't have another free session index.
*/
static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
uint32_t handle, uint32_t *allocated)
{
- unsigned i;
+ unsigned int i;
/* validate the handle */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
@@ -735,14 +736,14 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
- unsigned fb_idx = 0, bs_idx = 0;
+ unsigned int fb_idx = 0, bs_idx = 0;
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
uint32_t *size = &tmp;
- unsigned idx;
+ unsigned int idx;
int i, r = 0;
job->vm = NULL;
@@ -1084,7 +1085,7 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
*
*/
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
+ unsigned int flags)
{
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@@ -1106,7 +1107,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr;
- unsigned i;
+ unsigned int i;
int r, timeout = adev->usec_timeout;
/* skip ring test for sriov*/
@@ -1171,7 +1172,7 @@ error:
enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
{
- switch(ring) {
+ switch (ring) {
case 0:
return AMDGPU_RING_PRIO_0;
case 1:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2d94f1b63bd6..acbef1a24b9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -56,6 +56,7 @@
#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
@@ -77,6 +78,7 @@ MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -167,7 +169,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){
+ if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) {
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
} else {
@@ -233,11 +235,11 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << j))
continue;
- if (adev->vcn.indirect_sram) {
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
- &adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
- }
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[j].dpg_sram_bo,
+ &adev->vcn.inst[j].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+
kvfree(adev->vcn.inst[j].saved_bo);
amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
@@ -274,20 +276,19 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t
bool ret = false;
int vcn_config = adev->vcn.vcn_config[vcn_instance];
- if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
+ if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
ret = true;
- } else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) {
+ else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
ret = true;
- } else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
+ else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
ret = true;
- }
return ret;
}
int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
int i, idx;
@@ -316,7 +317,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
int amdgpu_vcn_resume(struct amdgpu_device *adev)
{
- unsigned size;
+ unsigned int size;
void *ptr;
int i, idx;
@@ -338,7 +339,7 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
adev->vcn.inst[i].saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
- unsigned offset;
+ unsigned int offset;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
@@ -369,9 +370,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (adev->vcn.harvest_config & (1 << j))
continue;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
- }
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
@@ -458,7 +458,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
- unsigned i;
+ unsigned int i;
int r;
/* VCN in SRIOV does not support direct register read/write */
@@ -795,7 +795,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr;
- unsigned i;
+ unsigned int i;
int r;
if (amdgpu_sriov_vf(adev))
@@ -993,11 +993,14 @@ error:
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
+ struct amdgpu_device *adev = ring->adev;
long r;
- r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
- if (r)
- goto error;
+ if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) {
+ r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
+ if (r)
+ goto error;
+ }
r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
@@ -1007,7 +1010,7 @@ error:
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
{
- switch(ring) {
+ switch (ring) {
case 0:
return AMDGPU_RING_PRIO_0;
case 1:
@@ -1026,6 +1029,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
+
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
@@ -1041,6 +1045,9 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
adev->firmware.ucode[idx].fw = adev->vcn.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3))
+ break;
}
dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
}
@@ -1051,7 +1058,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
*/
#if defined(CONFIG_DEBUG_FS)
static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
- size_t size, loff_t *pos)
+ size_t size, loff_t *pos)
{
struct amdgpu_vcn_inst *vcn;
void *log_buf;
@@ -1097,7 +1104,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
read_pos = plog->header_size;
if (read_num[i] == copy_to_user((buf + read_bytes),
- (log_buf + read_pos), read_num[i]))
+ (log_buf + read_pos), read_num[i]))
return -EFAULT;
read_bytes += read_num[i];
@@ -1118,7 +1125,7 @@ static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
#endif
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
- struct amdgpu_vcn_inst *vcn)
+ struct amdgpu_vcn_inst *vcn)
{
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
@@ -1126,7 +1133,7 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
char name[32];
sprintf(name, "amdgpu_vcn_%d_fwlog", i);
- debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn,
+ debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
&amdgpu_debugfs_vcnfwlog_fops,
AMDGPU_VCNFW_LOG_SIZE);
#endif
@@ -1140,7 +1147,7 @@ void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
- + vcn->fw_shared.log_offset;
+ + vcn->fw_shared.log_offset;
*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
fw_log->is_enabled = 1;
fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index f1397ef66fd7..92d5534df5f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -32,7 +32,7 @@
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
-#define AMDGPU_MAX_VCN_INSTANCES 2
+#define AMDGPU_MAX_VCN_INSTANCES 4
#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
@@ -141,18 +141,23 @@
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
- do { \
- if (!indirect) { \
- WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
- (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- } else { \
- *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
- *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
- } \
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
} while (0)
#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
@@ -243,6 +248,7 @@ struct amdgpu_vcn_inst {
uint32_t *dpg_sram_curr_addr;
atomic_t dpg_enc_submission_cnt;
struct amdgpu_vcn_fw_shared fw_shared;
+ uint8_t aid_id;
};
struct amdgpu_vcn_ras {
@@ -272,6 +278,9 @@ struct amdgpu_vcn {
struct ras_common_if *ras_if;
struct amdgpu_vcn_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f2e2cbaa7fde..25b4d7f0bd35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -56,7 +56,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
/* enable virtual display */
if (adev->asic_type != CHIP_ALDEBARAN &&
- adev->asic_type != CHIP_ARCTURUS) {
+ adev->asic_type != CHIP_ARCTURUS &&
+ ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
if (adev->mode_info.num_crtc == 0)
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
@@ -65,16 +66,19 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
adev->cg_flags = 0;
adev->pg_flags = 0;
- /* enable mcbp for sriov asic_type before soc21 */
- amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0;
+ /* enable mcbp for sriov */
+ amdgpu_mcbp = 1;
+ /* Reduce kcq number to 2 to reduce latency */
+ if (amdgpu_num_kcq == -1)
+ amdgpu_num_kcq = 2;
}
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
signed long r, cnt = 0;
unsigned long flags;
@@ -557,7 +561,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
- POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_IMU, adev->gfx.imu_fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
adev->psp.asd_context.bin_desc.fw_version);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3c0310576b3b..143d11afe0e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -267,6 +267,32 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
}
/**
+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
+ * @vm: the VM which state machine to reset
+ *
+ * Move all vm_bo object in the VM into a state where they will be updated
+ * again during validation.
+ */
+static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_bo_base *vm_bo, *tmp;
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->done, &vm->invalidated);
+ list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
+ vm_bo->moved = true;
+ list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = vm_bo->bo;
+
+ if (!bo || bo->tbo.type != ttm_bo_type_kernel)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ else if (bo->parent)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+ }
+ spin_unlock(&vm->status_lock);
+}
+
+/**
* amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
*
* @base: base structure for tracking BO usage in a VM
@@ -351,6 +377,58 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
spin_unlock(&adev->mman.bdev.lru_lock);
}
+/* Create scheduler entities for page table updates */
+static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ int r;
+
+ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+ if (r)
+ goto error;
+
+ return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+
+error:
+ drm_sched_entity_destroy(&vm->immediate);
+ return r;
+}
+
+/* Destroy the entities for page table updates again */
+static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
+{
+ drm_sched_entity_destroy(&vm->immediate);
+ drm_sched_entity_destroy(&vm->delayed);
+}
+
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+ if (!vm)
+ return result;
+
+ result += vm->generation;
+ /* Add one if the page tables will be re-generated on next CS */
+ if (drm_sched_entity_error(&vm->delayed))
+ ++result;
+
+ return result;
+}
+
/**
* amdgpu_vm_validate_pt_bos - validate the page table BOs
*
@@ -373,6 +451,15 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo;
int r;
+ if (drm_sched_entity_error(&vm->delayed)) {
+ ++vm->generation;
+ amdgpu_vm_bo_reset_state_machine(vm);
+ amdgpu_vm_fini_entities(vm);
+ r = amdgpu_vm_init_entities(adev, vm);
+ if (r)
+ return r;
+ }
+
spin_lock(&vm->status_lock);
while (!list_empty(&vm->evicted)) {
bo_base = list_first_entry(&vm->evicted,
@@ -920,42 +1007,51 @@ error_unlock:
return r;
}
+static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
+ struct amdgpu_mem_stats *stats)
+{
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+
+ if (!bo)
+ return;
+
+ /*
+ * For now ignore BOs which are currently locked and potentially
+ * changing their location.
+ */
+ if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
+ !dma_resv_trylock(bo->tbo.base.resv))
+ return;
+
+ amdgpu_bo_get_memory(bo, stats);
+ if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ dma_resv_unlock(bo->tbo.base.resv);
+}
+
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
struct amdgpu_mem_stats *stats)
{
struct amdgpu_bo_va *bo_va, *tmp;
spin_lock(&vm->status_lock);
- list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, stats);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, stats);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, stats);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, stats);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, stats);
- }
- list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
- if (!bo_va->base.bo)
- continue;
- amdgpu_bo_get_memory(bo_va->base.bo, stats);
- }
+ list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
spin_unlock(&vm->status_lock);
}
@@ -1358,6 +1454,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1;
+ bo_va->last_pt_update = dma_fence_get_stub();
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
@@ -1433,14 +1530,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
uint64_t eaddr;
/* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
- size == 0 || size & ~PAGE_MASK)
+ if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
+ return -EINVAL;
+ if (saddr + size <= saddr || offset + size <= offset)
return -EINVAL;
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
- if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)) ||
+ if ((bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
@@ -1499,14 +1596,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
int r;
/* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
- size == 0 || size & ~PAGE_MASK)
+ if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
+ return -EINVAL;
+ if (saddr + size <= saddr || offset + size <= offset)
return -EINVAL;
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
- if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)) ||
+ if ((bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
@@ -2038,19 +2135,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->pt_freed);
INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
- /* create scheduler entities for page table updates */
- r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
- adev->vm_manager.vm_pte_scheds,
- adev->vm_manager.vm_pte_num_scheds, NULL);
+ r = amdgpu_vm_init_entities(adev, vm);
if (r)
return r;
- r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
- adev->vm_manager.vm_pte_scheds,
- adev->vm_manager.vm_pte_num_scheds, NULL);
- if (r)
- goto error_free_immediate;
-
vm->pte_support_ats = false;
vm->is_compute_context = false;
@@ -2067,9 +2155,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->update_funcs = &amdgpu_vm_cpu_funcs;
else
vm->update_funcs = &amdgpu_vm_sdma_funcs;
- vm->last_update = NULL;
+
+ vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub();
vm->last_tlb_flush = dma_fence_get_stub();
+ vm->generation = 0;
mutex_init(&vm->eviction_lock);
vm->evicting = false;
@@ -2110,10 +2200,7 @@ error_free_root:
error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
- drm_sched_entity_destroy(&vm->delayed);
-
-error_free_immediate:
- drm_sched_entity_destroy(&vm->immediate);
+ amdgpu_vm_fini_entities(vm);
return r;
}
@@ -2192,7 +2279,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
goto unreserve_bo;
dma_fence_put(vm->last_update);
- vm->last_update = NULL;
+ vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
/* Free the shadow bo for compute VM */
@@ -2266,8 +2353,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unref(&root);
WARN_ON(vm->root.bo);
- drm_sched_entity_destroy(&vm->immediate);
- drm_sched_entity_destroy(&vm->delayed);
+ amdgpu_vm_fini_entities(vm);
if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
dev_err(adev->dev, "still active bo inside vm\n");
@@ -2282,8 +2368,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
dma_fence_put(vm->last_update);
- for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
- amdgpu_vmid_free_reserved(adev, vm, i);
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+ if (vm->reserved_vmid[i]) {
+ amdgpu_vmid_free_reserved(adev, i);
+ vm->reserved_vmid[i] = false;
+ }
+ }
+
}
/**
@@ -2366,18 +2458,25 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
- int r;
+
+ /* No valid flags defined yet */
+ if (args->in.flags)
+ return -EINVAL;
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
- AMDGPU_GFXHUB_0);
- if (r)
- return r;
+ if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+ fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
+ }
+
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
- amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
+ if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
+ fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
+ }
break;
default:
return -EINVAL;
@@ -2432,6 +2531,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ * GFX 9.4.3.
* @addr: Address of the fault
* @write_fault: true is write fault, false is read fault
*
@@ -2439,7 +2541,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- uint64_t addr, bool write_fault)
+ u32 vmid, u32 node_id, uint64_t addr,
+ bool write_fault)
{
bool is_compute_context = false;
struct amdgpu_bo *root;
@@ -2463,8 +2566,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE;
- if (is_compute_context &&
- !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
+ if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
+ node_id, addr, write_fault)) {
amdgpu_bo_unref(&root);
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6f085f0b4ef3..9c85d494f2a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -111,11 +111,14 @@ struct amdgpu_mem_stats;
/* Reserve 4MB VRAM for page tables */
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
-/* max number of VMHUB */
-#define AMDGPU_MAX_VMHUBS 3
-#define AMDGPU_GFXHUB_0 0
-#define AMDGPU_MMHUB_0 1
-#define AMDGPU_MMHUB_1 2
+/*
+ * max number of VMHUB
+ * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
+ */
+#define AMDGPU_MAX_VMHUBS 13
+#define AMDGPU_GFXHUB(x) (x)
+#define AMDGPU_MMHUB0(x) (8 + x)
+#define AMDGPU_MMHUB1(x) (8 + 4 + x)
/* Reserve 2MB at top/bottom of address space for kernel use */
#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
@@ -292,6 +295,9 @@ struct amdgpu_vm {
atomic64_t tlb_seq;
struct dma_fence *last_tlb_flush;
+ /* How many times we had to re-generate the page tables */
+ uint64_t generation;
+
/* Last unlocked submission to the scheduler entities */
struct dma_fence *last_unlocked;
@@ -326,6 +332,9 @@ struct amdgpu_vm {
struct ttm_lru_bulk_move lru_bulk_move;
/* Flag to indicate if VM is used for compute */
bool is_compute_context;
+
+ /* Memory partition number, -1 means any partition */
+ int8_t mem_id;
};
struct amdgpu_vm_manager {
@@ -391,6 +400,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
struct amdgpu_bo_list_entry *entry);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*callback)(void *p, struct amdgpu_bo *bo),
void *param);
@@ -452,7 +462,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
struct amdgpu_task_info *task_info);
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- uint64_t addr, bool write_fault);
+ u32 vmid, u32 node_id, uint64_t addr,
+ bool write_fault);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 051c7194ab4f..dea1a64be44d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -502,6 +502,7 @@ exit:
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int level, bool immediate, struct amdgpu_bo_vm **vmbo)
{
+ struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
struct amdgpu_bo_param bp;
struct amdgpu_bo *bo;
struct dma_resv *resv;
@@ -512,7 +513,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.size = amdgpu_vm_pt_size(adev, level);
bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+ if (!adev->gmc.is_app_apu)
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ else
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -529,6 +535,8 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
bp.no_wait_gpu = immediate;
+ bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+
if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv;
@@ -553,6 +561,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
@@ -780,13 +789,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
uint64_t pe, uint64_t addr,
unsigned int count, uint32_t incr,
uint64_t flags)
-
{
+ struct amdgpu_device *adev = params->adev;
+
if (level != AMDGPU_VM_PTB) {
flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
+ amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
- } else if (params->adev->asic_type >= CHIP_VEGA10 &&
+ } else if (adev->asic_type >= CHIP_VEGA10 &&
!(flags & AMDGPU_PTE_VALID) &&
!(flags & AMDGPU_PTE_PRT)) {
@@ -794,6 +804,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
flags |= AMDGPU_PTE_EXECUTABLE;
}
+ /* APUs mapping system memory may need different MTYPEs on different
+ * NUMA nodes. Only do this for contiguous ranges that can be assumed
+ * to be on the same NUMA node.
+ */
+ if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
+ adev->gmc.gmc_funcs->override_vm_pte_flags &&
+ num_possible_nodes() > 1) {
+ if (!params->pages_addr)
+ amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
+ addr, &flags);
+ else
+ dev_dbg(adev->dev,
+ "override_vm_pte_flags skipped: non-contiguous\n");
+ }
+
params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
flags);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index afacfb9b5bf6..c7085a747b03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -370,6 +370,45 @@ out:
return ret;
}
+static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
+}
+
+static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
+ return false;
+}
+
+static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
+ return true;
+}
+
+static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
+}
+
+static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
+ return -ENOSPC;
+}
+
/**
* amdgpu_vram_mgr_new - allocate new ranges
*
@@ -818,6 +857,14 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
mutex_unlock(&mgr->lock);
}
+static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
+ .alloc = amdgpu_dummy_vram_mgr_new,
+ .free = amdgpu_dummy_vram_mgr_del,
+ .intersects = amdgpu_dummy_vram_mgr_intersects,
+ .compatible = amdgpu_dummy_vram_mgr_compatible,
+ .debug = amdgpu_dummy_vram_mgr_debug
+};
+
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc = amdgpu_vram_mgr_new,
.free = amdgpu_vram_mgr_del,
@@ -842,17 +889,22 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);
- man->func = &amdgpu_vram_mgr_func;
-
- err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
- if (err)
- return err;
-
mutex_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
mgr->default_page_size = PAGE_SIZE;
+ if (!adev->gmc.is_app_apu) {
+ man->func = &amdgpu_vram_mgr_func;
+
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
+ } else {
+ man->func = &amdgpu_dummy_vram_mgr_func;
+ DRM_INFO("Setup dummy vram mgr\n");
+ }
+
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
return 0;
@@ -887,7 +939,8 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
drm_buddy_free_list(&mgr->mm, &rsv->allocated);
kfree(rsv);
}
- drm_buddy_fini(&mgr->mm);
+ if (!adev->gmc.is_app_apu)
+ drm_buddy_fini(&mgr->mm);
mutex_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
new file mode 100644
index 000000000000..d733fa6e7477
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_drv.h"
+
+#include <drm/drm_drv.h>
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
+{
+ int (*run_func)(void *handle, uint32_t inst_mask);
+ int ret = 0;
+
+ if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
+ return 0;
+
+ run_func = NULL;
+
+ switch (xcp_state) {
+ case AMDGPU_XCP_PREPARE_SUSPEND:
+ run_func = xcp_ip->ip_funcs->prepare_suspend;
+ break;
+ case AMDGPU_XCP_SUSPEND:
+ run_func = xcp_ip->ip_funcs->suspend;
+ break;
+ case AMDGPU_XCP_PREPARE_RESUME:
+ run_func = xcp_ip->ip_funcs->prepare_resume;
+ break;
+ case AMDGPU_XCP_RESUME:
+ run_func = xcp_ip->ip_funcs->resume;
+ break;
+ }
+
+ if (run_func)
+ ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
+
+ return ret;
+}
+
+static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ int state)
+{
+ struct amdgpu_xcp_ip *xcp_ip;
+ struct amdgpu_xcp *xcp;
+ int i, ret;
+
+ if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
+ return -EINVAL;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
+ xcp_ip = &xcp->ip[i];
+ ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_SUSPEND);
+}
+
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
+}
+
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_RESUME);
+}
+
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
+}
+
+static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_xcp *xcp;
+
+ if (!ip)
+ return;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ xcp->ip[ip->ip_id] = *ip;
+ xcp->ip[ip->ip_id].valid = true;
+
+ xcp->valid = true;
+}
+
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ struct amdgpu_xcp_ip ip;
+ uint8_t mem_id;
+ int i, j, ret;
+
+ if (!num_xcps || num_xcps > MAX_XCP)
+ return -EINVAL;
+
+ xcp_mgr->mode = mode;
+
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].valid = false;
+
+ for (i = 0; i < num_xcps; ++i) {
+ for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
+ ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
+ &ip);
+ if (ret)
+ continue;
+
+ __amdgpu_xcp_add_block(xcp_mgr, i, &ip);
+ }
+
+ xcp_mgr->xcp[i].id = i;
+
+ if (xcp_mgr->funcs->get_xcp_mem_id) {
+ ret = xcp_mgr->funcs->get_xcp_mem_id(
+ xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
+ if (ret)
+ continue;
+ else
+ xcp_mgr->xcp[i].mem_id = mem_id;
+ }
+ }
+
+ xcp_mgr->num_xcps = num_xcps;
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
+ return 0;
+}
+
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int ret, curr_mode, num_xcps = 0;
+
+ if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
+ return -EINVAL;
+
+ if (xcp_mgr->mode == mode)
+ return 0;
+
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
+ return 0;
+
+ mutex_lock(&xcp_mgr->xcp_lock);
+
+ curr_mode = xcp_mgr->mode;
+ /* State set to transient mode */
+ xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
+
+ ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
+
+ if (ret) {
+ /* Failed, get whatever mode it's at now */
+ if (xcp_mgr->funcs->query_partition_mode)
+ xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
+ xcp_mgr, AMDGPU_XCP_FL_LOCKED);
+ else
+ xcp_mgr->mode = curr_mode;
+
+ goto out;
+ }
+
+out:
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return ret;
+}
+
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int mode;
+
+ if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return xcp_mgr->mode;
+
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
+ return xcp_mgr->mode;
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_lock(&xcp_mgr->xcp_lock);
+ mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
+ dev_WARN(
+ xcp_mgr->adev->dev,
+ "Cached partition mode %d not matching with device mode %d",
+ xcp_mgr->mode, mode);
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return mode;
+}
+
+static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ struct drm_device *ddev;
+ int i, ret;
+
+ ddev = adev_to_drm(adev);
+
+ for (i = 0; i < MAX_XCP; i++) {
+ ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
+ if (ret)
+ return ret;
+
+ /* Redirect all IOCTLs to the primary device */
+ adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
+ adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
+ adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
+ adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
+ p_ddev->render->dev = ddev;
+ p_ddev->primary->dev = ddev;
+ p_ddev->vma_offset_manager = ddev->vma_offset_manager;
+ p_ddev->driver = &amdgpu_partition_driver;
+ adev->xcp_mgr->xcp[i].ddev = p_ddev;
+ }
+
+ return 0;
+}
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_num_xcps,
+ struct amdgpu_xcp_mgr_funcs *xcp_funcs)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr;
+
+ if (!xcp_funcs || !xcp_funcs->switch_partition_mode ||
+ !xcp_funcs->get_ip_details)
+ return -EINVAL;
+
+ xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
+
+ if (!xcp_mgr)
+ return -ENOMEM;
+
+ xcp_mgr->adev = adev;
+ xcp_mgr->funcs = xcp_funcs;
+ xcp_mgr->mode = init_mode;
+ mutex_init(&xcp_mgr->xcp_lock);
+
+ if (init_mode != AMDGPU_XCP_MODE_NONE)
+ amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
+
+ adev->xcp_mgr = xcp_mgr;
+
+ return amdgpu_xcp_dev_alloc(adev);
+}
+
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance)
+{
+ struct amdgpu_xcp *xcp;
+ int i, id_mask = 0;
+
+ if (ip >= AMDGPU_XCP_MAX_BLOCKS)
+ return -EINVAL;
+
+ for (i = 0; i < xcp_mgr->num_xcps; ++i) {
+ xcp = &xcp_mgr->xcp[i];
+ if ((xcp->valid) && (xcp->ip[ip].valid) &&
+ (xcp->ip[ip].inst_mask & BIT(instance)))
+ id_mask |= BIT(i);
+ }
+
+ if (!id_mask)
+ id_mask = -ENXIO;
+
+ return id_mask;
+}
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask)
+{
+ if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
+ return -EINVAL;
+
+ *inst_mask = xcp->ip[ip].inst_mask;
+
+ return 0;
+}
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent)
+{
+ int i, ret;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ int i;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ p_ddev = adev->xcp_mgr->xcp[i].ddev;
+ drm_dev_unplug(p_ddev);
+ p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
+ p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
+ p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
+ p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
+ }
+}
+
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv)
+{
+ int i;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ fpriv->xcp_id = ~0;
+ for (i = 0; i < MAX_XCP; ++i) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
+ if (adev->xcp_mgr->xcp[i].valid == FALSE) {
+ dev_err(adev->dev, "renderD%d partition %d not valid!",
+ file_priv->minor->index, i);
+ return -ENOENT;
+ }
+ dev_dbg(adev->dev, "renderD%d partition %d opened!",
+ file_priv->minor->index, i);
+ fpriv->xcp_id = i;
+ break;
+ }
+ }
+
+ fpriv->vm.mem_id = fpriv->xcp_id == ~0 ? -1 :
+ adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
+ return 0;
+}
+
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
+{
+ struct drm_gpu_scheduler *sched;
+ struct amdgpu_ring *ring;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ sched = entity->entity.rq->sched;
+ if (sched->ready) {
+ ring = to_amdgpu_ring(entity->entity.rq->sched);
+ atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
new file mode 100644
index 000000000000..0f8026d64ea5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_XCP_H
+#define AMDGPU_XCP_H
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "amdgpu_ctx.h"
+
+#define MAX_XCP 8
+
+#define AMDGPU_XCP_MODE_NONE -1
+#define AMDGPU_XCP_MODE_TRANS -2
+
+#define AMDGPU_XCP_FL_NONE 0
+#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+
+struct amdgpu_fpriv;
+
+enum AMDGPU_XCP_IP_BLOCK {
+ AMDGPU_XCP_GFXHUB,
+ AMDGPU_XCP_GFX,
+ AMDGPU_XCP_SDMA,
+ AMDGPU_XCP_VCN,
+ AMDGPU_XCP_MAX_BLOCKS
+};
+
+enum AMDGPU_XCP_STATE {
+ AMDGPU_XCP_PREPARE_SUSPEND,
+ AMDGPU_XCP_SUSPEND,
+ AMDGPU_XCP_PREPARE_RESUME,
+ AMDGPU_XCP_RESUME,
+};
+
+struct amdgpu_xcp_ip_funcs {
+ int (*prepare_suspend)(void *handle, uint32_t inst_mask);
+ int (*suspend)(void *handle, uint32_t inst_mask);
+ int (*prepare_resume)(void *handle, uint32_t inst_mask);
+ int (*resume)(void *handle, uint32_t inst_mask);
+};
+
+struct amdgpu_xcp_ip {
+ struct amdgpu_xcp_ip_funcs *ip_funcs;
+ uint32_t inst_mask;
+
+ enum AMDGPU_XCP_IP_BLOCK ip_id;
+ bool valid;
+};
+
+struct amdgpu_xcp {
+ struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
+
+ uint8_t id;
+ uint8_t mem_id;
+ bool valid;
+ atomic_t ref_cnt;
+ struct drm_device *ddev;
+ struct drm_device *rdev;
+ struct drm_device *pdev;
+ struct drm_driver *driver;
+ struct drm_vma_offset_manager *vma_offset_manager;
+ struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+};
+
+struct amdgpu_xcp_mgr {
+ struct amdgpu_device *adev;
+ struct mutex xcp_lock;
+ struct amdgpu_xcp_mgr_funcs *funcs;
+
+ struct amdgpu_xcp xcp[MAX_XCP];
+ uint8_t num_xcps;
+ int8_t mode;
+
+ /* Used to determine KFD memory size limits per XCP */
+ unsigned int num_xcp_per_mem_partition;
+};
+
+struct amdgpu_xcp_mgr_funcs {
+ int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
+ int *num_xcps);
+ int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
+ int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip);
+ int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id);
+
+ int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*select_scheds)(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds);
+ int (*update_partition_sched_list)(struct amdgpu_device *adev);
+};
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance);
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask);
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent);
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv);
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity);
+
+#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \
+ ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
+ (adev)->xcp_mgr->funcs->select_scheds ? \
+ (adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT)
+#define amdgpu_xcp_update_partition_sched_list(adev) \
+ ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
+ (adev)->xcp_mgr->funcs->update_partition_sched_list ? \
+ (adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0)
+
+static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr)
+ return 1;
+ else
+ return xcp_mgr->num_xcps;
+}
+
+static inline struct amdgpu_xcp *
+amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
+{
+ if (!xcp_mgr)
+ return NULL;
+
+ while (*from < MAX_XCP) {
+ if (xcp_mgr->xcp[*from].valid)
+ return &xcp_mgr->xcp[*from];
+ ++(*from);
+ }
+
+ return NULL;
+}
+
+#define for_each_xcp(xcp_mgr, xcp, i) \
+ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
+ xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 439925477fb8..03dc59cbe8aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -325,6 +325,36 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
}
+static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
static ssize_t amdgpu_xgmi_show_error(struct device *dev,
struct device_attribute *attr,
@@ -361,6 +391,8 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
+static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
+static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
@@ -380,6 +412,15 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
if (ret)
pr_err("failed to create xgmi_error\n");
+ /* Create xgmi num hops file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops);
+ if (ret)
+ pr_err("failed to create xgmi_num_hops\n");
+
+ /* Create xgmi num links file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (ret)
+ pr_err("failed to create xgmi_num_links\n");
/* Create sysfs link to hive info folder on the first device */
if (hive->kobj.parent != (&adev->dev->kobj)) {
@@ -407,6 +448,9 @@ remove_link:
remove_file:
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
success:
return ret;
@@ -420,6 +464,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
if (hive->kobj.parent != (&adev->dev->kobj))
sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
@@ -1014,7 +1060,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
}
/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if)
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
{
int ret = 0;
struct ta_ras_trigger_error_input *block_info =
@@ -1026,7 +1073,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *injec
if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
dev_warn(adev->dev, "Failed to disallow XGMI power down");
- ret = psp_ras_trigger_error(&adev->psp, block_info);
+ ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
if (amdgpu_ras_intr_triggered())
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 24d42d24e6a0..104a5ad8397d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -70,7 +70,6 @@ enum amd_sriov_ucode_engine_id {
AMD_SRIOV_UCODE_ID_RLC_SRLS,
AMD_SRIOV_UCODE_ID_MEC,
AMD_SRIOV_UCODE_ID_MEC2,
- AMD_SRIOV_UCODE_ID_IMU,
AMD_SRIOV_UCODE_ID_SOS,
AMD_SRIOV_UCODE_ID_ASD,
AMD_SRIOV_UCODE_ID_TA_RAS,
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
new file mode 100644
index 000000000000..16471b81a1f5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "amdgpu_xcp.h"
+#include "gfx_v9_4_3.h"
+#include "gfxhub_v1_2.h"
+#include "sdma_v4_4_2.h"
+
+#define XCP_INST_MASK(num_inst, xcp_id) \
+ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
+
+#define AMDGPU_XCP_OPS_KFD (1 << 0)
+
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
+
+ adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
+
+ adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
+ adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
+
+ adev->doorbell_index.sdma_doorbell_range = 20;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->doorbell_index.sdma_engine[i] =
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
+ i * (adev->doorbell_index.sdma_doorbell_range >> 1);
+
+ adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
+}
+
+static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
+ uint32_t inst_idx, struct amdgpu_ring *ring)
+{
+ int xcp_id;
+ enum AMDGPU_XCP_IP_BLOCK ip_blk;
+ uint32_t inst_mask;
+
+ ring->xcp_id = ~0;
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return;
+
+ inst_mask = 1 << inst_idx;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ ip_blk = AMDGPU_XCP_GFX;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ ip_blk = AMDGPU_XCP_SDMA;
+ break;
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ ip_blk = AMDGPU_XCP_VCN;
+ if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ inst_mask = 1 << (inst_idx * 2);
+ break;
+ default:
+ DRM_ERROR("Not support ring type %d!", ring->funcs->type);
+ return;
+ }
+
+ for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
+ if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
+ ring->xcp_id = xcp_id;
+ break;
+ }
+ }
+}
+
+static void aqua_vanjaram_xcp_gpu_sched_update(
+ struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int sel_xcp_id)
+{
+ unsigned int *num_gpu_sched;
+
+ num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
+ .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
+ adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
+ .sched[(*num_gpu_sched)++] = &ring->sched;
+ DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name,
+ sel_xcp_id, ring->funcs->type,
+ ring->hw_prio, *num_gpu_sched);
+}
+
+static int aqua_vanjaram_xcp_sched_list_update(
+ struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
+ memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
+ }
+
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ ring = adev->rings[i];
+ if (!ring || !ring->sched.ready)
+ continue;
+
+ aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
+
+ /* VCN is shared by two partitions under CPX MODE */
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
+ }
+
+ return 0;
+}
+
+static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->num_rings; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring);
+ else
+ aqua_vanjaram_set_xcp_id(adev, ring->me, ring);
+ }
+
+ return aqua_vanjaram_xcp_sched_list_update(adev);
+}
+
+static int aqua_vanjaram_select_scheds(
+ struct amdgpu_device *adev,
+ u32 hw_ip,
+ u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds)
+{
+ u32 sel_xcp_id;
+ int i;
+
+ if (fpriv->xcp_id == ~0) {
+ u32 least_ref_cnt = ~0;
+
+ fpriv->xcp_id = 0;
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+ u32 total_ref_cnt;
+
+ total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt);
+ if (total_ref_cnt < least_ref_cnt) {
+ fpriv->xcp_id = i;
+ least_ref_cnt = total_ref_cnt;
+ }
+ }
+ }
+ sel_xcp_id = fpriv->xcp_id;
+
+ if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
+ *num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
+ *scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
+ atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
+ DRM_DEBUG("Selected partition #%d", sel_xcp_id);
+ } else {
+ DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst)
+{
+ int8_t dev_inst;
+
+ switch (block) {
+ case GC_HWIP:
+ case SDMA0_HWIP:
+ /* Both JPEG and VCN as JPEG is only alias of VCN */
+ case VCN_HWIP:
+ dev_inst = adev->ip_map.dev_inst[block][inst];
+ break;
+ default:
+ /* For rest of the IPs, no look up required.
+ * Assume 'logical instance == physical instance' for all configs. */
+ dev_inst = inst;
+ break;
+ }
+
+ return dev_inst;
+}
+
+static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask)
+{
+ uint32_t dev_mask = 0;
+ int8_t log_inst, dev_inst;
+
+ while (mask) {
+ log_inst = ffs(mask) - 1;
+ dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst);
+ dev_mask |= (1 << dev_inst);
+ mask &= ~(1 << log_inst);
+ }
+
+ return dev_mask;
+}
+
+static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type ip_block,
+ uint32_t inst_mask)
+{
+ int l = 0, i;
+
+ while (inst_mask) {
+ i = ffs(inst_mask) - 1;
+ adev->ip_map.dev_inst[ip_block][l++] = i;
+ inst_mask &= ~(1 << i);
+ }
+ for (; l < HWIP_MAX_INSTANCE; l++)
+ adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
+void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
+{
+ u32 ip_map[][2] = {
+ { GC_HWIP, adev->gfx.xcc_mask },
+ { SDMA0_HWIP, adev->sdma.sdma_mask },
+ { VCN_HWIP, adev->vcn.inst_mask },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+ aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
+
+ adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
+ adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask;
+}
+
+/* Fixed pattern for smn addressing on different AIDs:
+ * bit[34]: indicate cross AID access
+ * bit[33:32]: indicate target AID id
+ * AID id range is 0 ~ 3 as maximum AID number is 4.
+ */
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
+{
+ u64 ext_offset;
+
+ /* local routing and bit[34:32] will be zeros */
+ if (ext_id == 0)
+ return 0;
+
+ /* Initiated from host, accessing to all non-zero aids are cross traffic */
+ ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
+
+ return ext_offset;
+}
+
+static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ if (adev->nbio.funcs->get_compute_partition_mode)
+ mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+
+ return mode;
+}
+
+static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int num_xcc, num_xcc_per_xcp = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_xcc_per_xcp = 1;
+ break;
+ }
+
+ return num_xcc_per_xcp;
+}
+
+static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
+ int num_sdma, num_vcn;
+
+ num_sdma = adev->sdma.num_instances;
+ num_vcn = adev->vcn.num_vcn_inst;
+
+ switch (xcp_mgr->mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma;
+ num_vcn_xcp = num_vcn;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma / 2;
+ num_vcn_xcp = num_vcn / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma / 3;
+ num_vcn_xcp = num_vcn / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma / 4;
+ num_vcn_xcp = num_vcn / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_sdma_xcp = 2;
+ num_vcn_xcp = num_vcn ? 1 : 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+
+ switch (ip_id) {
+ case AMDGPU_XCP_GFXHUB:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_GFX:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
+ break;
+ case AMDGPU_XCP_SDMA:
+ ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
+ ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_VCN:
+ ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
+ /* TODO : Assign IP funcs */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ip->ip_id = ip_id;
+
+ return 0;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ if (adev->gmc.num_mem_partitions == 1)
+ return AMDGPU_SPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc / 2)
+ return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
+ AMDGPU_QPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
+ return AMDGPU_DPX_PARTITION_MODE;
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum amdgpu_gfx_partition mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xccs_per_xcp;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
+ case AMDGPU_DPX_PARTITION_MODE:
+ return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
+ case AMDGPU_TPX_PARTITION_MODE:
+ return (adev->gmc.num_mem_partitions == 1 ||
+ adev->gmc.num_mem_partitions == 3) &&
+ ((num_xcc % 3) == 0);
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xccs_per_xcp = num_xcc / 4;
+ return (adev->gmc.num_mem_partitions == 1 ||
+ adev->gmc.num_mem_partitions == 4) &&
+ (num_xccs_per_xcp >= 2);
+ case AMDGPU_CPX_PARTITION_MODE:
+ return ((num_xcc > 1) &&
+ (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) &&
+ (num_xcc % adev->gmc.num_mem_partitions) == 0);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ /* TODO:
+ * Stop user queues and threads, and make sure GPU is empty of work.
+ */
+
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
+
+ return 0;
+}
+
+static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ amdgpu_amdkfd_device_probe(xcp_mgr->adev);
+ amdgpu_amdkfd_device_init(xcp_mgr->adev);
+ /* If KFD init failed, return failure */
+ if (!xcp_mgr->adev->kfd.init_complete)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode, int *num_xcps)
+{
+ int num_xcc_per_xcp, num_xcc, ret;
+ struct amdgpu_device *adev;
+ u32 flags = 0;
+
+ adev = xcp_mgr->adev;
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
+ mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+ } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
+ dev_err(adev->dev,
+ "Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
+ amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+
+ if (adev->kfd.init_complete)
+ flags |= AMDGPU_XCP_OPS_KFD;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+ if (ret)
+ goto out;
+ }
+
+ ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags);
+ if (ret)
+ goto unlock;
+
+ num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
+ if (adev->gfx.funcs->switch_partition_mode)
+ adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
+ num_xcc_per_xcp);
+
+ /* Init info about new xcps */
+ *num_xcps = num_xcc / num_xcc_per_xcp;
+ amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+ ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
+unlock:
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_unlock_kfd(adev);
+out:
+ return ret;
+}
+
+static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
+ int xcc_id, uint8_t *mem_id)
+{
+ /* memory/spatial modes validation check is already done */
+ *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+ *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+ struct amdgpu_numa_info numa_info;
+ struct amdgpu_device *adev;
+ uint32_t xcc_mask;
+ int r, i, xcc_id;
+
+ adev = xcp_mgr->adev;
+ /* TODO: BIOS is not returning the right info now
+ * Check on this later
+ */
+ /*
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ */
+ if (adev->gmc.num_mem_partitions == 1) {
+ /* Only one range */
+ *mem_id = 0;
+ return 0;
+ }
+
+ r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+ if (r || !xcc_mask)
+ return -EINVAL;
+
+ xcc_id = ffs(xcc_mask) - 1;
+ if (!adev->gmc.is_app_apu)
+ return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+ r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+ if (r)
+ return r;
+
+ r = -EINVAL;
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+ *mem_id = i;
+ r = 0;
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ if (!ip)
+ return -EINVAL;
+
+ return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
+}
+
+struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
+ .switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
+ .query_partition_mode = &aqua_vanjaram_query_partition_mode,
+ .get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+ .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
+ .select_scheds = &aqua_vanjaram_select_scheds,
+ .update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list
+};
+
+static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
+ &aqua_vanjaram_xcp_funcs);
+ if (ret)
+ return ret;
+
+ /* TODO: Default memory node affinity init */
+
+ return ret;
+}
+
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
+{
+ u32 mask, inst_mask = adev->sdma.sdma_mask;
+ int ret, i;
+
+ /* generally 1 AID supports 4 instances */
+ adev->sdma.num_inst_per_aid = 4;
+ adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
+
+ adev->aid_mask = i = 1;
+ inst_mask >>= adev->sdma.num_inst_per_aid;
+
+ for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
+ inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
+ if ((inst_mask & mask) == mask)
+ adev->aid_mask |= (1 << i);
+ }
+
+ /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
+ * addressed based on logical instance ids.
+ */
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_inst_per_aid = 1;
+ adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
+ adev->jpeg.harvest_config = 0;
+ adev->jpeg.num_inst_per_aid = 1;
+ adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
+
+ ret = aqua_vanjaram_xcp_mgr_init(adev);
+ if (ret)
+ return ret;
+
+ aqua_vanjaram_ip_map_init(adev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index de6d10390ab2..5641cf05d856 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 67d16236b216..52598fbc9b39 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -489,8 +489,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
cik_sdma_enable(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ab44c1391d52..be984f8c71c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3490,7 +3490,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
+ u32 sh_num, u32 instance, int xcc_id);
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
@@ -3568,7 +3568,7 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
- if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
return;
}
@@ -3636,7 +3636,7 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs;
}
static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
@@ -4219,7 +4219,7 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -4291,7 +4291,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* in gfx10 the SIMD_ID is specified as part of the INSTANCE
* field when performing a select_se_sh so it should be
@@ -4318,7 +4318,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4329,7 +4329,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
dst);
}
-static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -4340,7 +4340,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
nv_grbm_select(adev, me, pipe, q, vm);
}
@@ -4461,7 +4461,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
@@ -4490,7 +4490,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX10_MEC_HPD_SIZE);
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
@@ -4550,7 +4550,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
- &adev->gfx.kiq.irq);
+ &adev->gfx.kiq[0].irq);
if (r)
return r;
@@ -4614,8 +4614,8 @@ static int gfx_v10_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
- j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v10_0_compute_ring_init(adev, ring_id,
@@ -4629,19 +4629,19 @@ static int gfx_v10_0_sw_init(void *handle)
}
if (!adev->enable_mes_kiq) {
- r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
if (r)
return r;
}
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@@ -4690,11 +4690,11 @@ static int gfx_v10_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
if (!adev->enable_mes_kiq) {
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
}
gfx_v10_0_pfp_fini(adev);
@@ -4712,7 +4712,7 @@ static int gfx_v10_0_sw_fini(void *handle)
}
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -4772,13 +4772,13 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
(adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v10_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
@@ -4825,6 +4825,29 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@@ -4856,6 +4879,9 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
}
+
+ gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+ AMDGPU_NUM_VMID);
}
static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
@@ -4907,7 +4933,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
/*
* Set corresponding TCP bits for the inactive WGPs in
@@ -4940,7 +4966,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -4978,7 +5004,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
nv_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@@ -6073,7 +6099,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
- u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
@@ -6168,11 +6193,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v10_0_cp_gfx_start(adev);
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-
return 0;
}
@@ -6214,7 +6234,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
break;
}
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -6423,55 +6443,6 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
return 0;
}
-#ifdef BRING_UP_DEBUG
-static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v10_gfx_mqd *mqd = ring->mqd_ptr;
-
- /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
-
- /* set GFX_MQD_BASE */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set GFX_MQD_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
-
- /* set GFX_HQD_VMID to 0 */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
-
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY,
- mqd->cp_gfx_hqd_queue_priority);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
-
- /* set GFX_HQD_BASE, similar as CP_RB_BASE */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
-
- /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
-
- /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
-
- /* set RB_WPTR_POLL_ADDR */
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
-
- /* set RB_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
-
- /* active the queue */
- WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
-
- return 0;
-}
-#endif
-
static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -6492,59 +6463,23 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
-#ifdef BRING_UP_DEBUG
- gfx_v10_0_gfx_queue_init_register(ring);
-#endif
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) {
- /* reset mqd with the backup copy */
+ } else {
+ /* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
amdgpu_ring_clear_ring(ring);
-#ifdef BRING_UP_DEBUG
- mutex_lock(&adev->srbm_mutex);
- nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v10_0_gfx_queue_init_register(ring);
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-#endif
- } else {
- amdgpu_ring_clear_ring(ring);
}
return 0;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r, i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
- adev->gfx.num_gfx_rings);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
@@ -6555,7 +6490,7 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- goto done;
+ return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
@@ -6565,23 +6500,14 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
}
amdgpu_bo_unreserve(ring->mqd_obj);
if (r)
- goto done;
+ return r;
}
-#ifndef BRING_UP_DEBUG
- r = gfx_v10_0_kiq_enable_kgq(adev);
- if (r)
- goto done;
-#endif
- r = gfx_v10_0_cp_gfx_start(adev);
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-done:
- return r;
+ return gfx_v10_0_cp_gfx_start(adev);
}
static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
@@ -6812,14 +6738,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -6841,8 +6766,8 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
@@ -6864,17 +6789,14 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
-
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
- } else {
- amdgpu_ring_clear_ring(ring);
}
return 0;
@@ -6885,7 +6807,7 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
int r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
@@ -6901,7 +6823,6 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
return 0;
}
@@ -6929,7 +6850,7 @@ static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
goto done;
}
- r = amdgpu_gfx_enable_kcq(adev);
+ r = amdgpu_gfx_enable_kcq(adev, 0);
done:
return r;
}
@@ -7242,47 +7163,20 @@ static int gfx_v10_0_hw_init(void *handle)
return r;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
- return -EINVAL;
-
- if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_gfx_rings))
- return -ENOMEM;
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
- PREEMPT_QUEUES, 0, 0);
- if (!adev->job_hang)
- return amdgpu_ring_test_helper(kiq_ring);
- else
- return 0;
-}
-#endif
-
static int gfx_v10_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
if (!adev->no_hw_access) {
-#ifndef BRING_UP_DEBUG
if (amdgpu_async_gfx_ring) {
- r = gfx_v10_0_kiq_disable_kgq(adev);
- if (r)
+ if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
-#endif
- if (amdgpu_gfx_disable_kcq(adev))
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
}
@@ -7574,7 +7468,7 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
}
-static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -7615,7 +7509,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
@@ -7962,7 +7856,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d
static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* enable FGCG firstly*/
@@ -8001,7 +7895,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
AMD_CG_SUPPORT_GFX_3D_CGLS))
gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -8095,11 +7989,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
gfx_v10_cntl_power_gating(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
@@ -8648,7 +8542,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
@@ -9156,7 +9050,7 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
uint32_t tmp, target;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
if (ring->me == 1)
target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
@@ -9200,7 +9094,7 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -9377,7 +9271,7 @@ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
@@ -9411,8 +9305,8 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
- adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+ adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs;
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
@@ -9549,7 +9443,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
@@ -9570,7 +9464,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index c4940b6ea1c4..690e121d9dda 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -112,7 +112,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance);
+ u32 sh_num, u32 instance, int xcc_id);
static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
@@ -123,8 +123,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel);
-static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
-static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
bool enable);
@@ -192,7 +192,7 @@ static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
- if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
return;
}
@@ -260,7 +260,7 @@ static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
}
static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
@@ -463,6 +463,23 @@ out:
return err;
}
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 1505) &&
+ (adev->gfx.pfp_fw_version >= 1600) &&
+ (adev->gfx.mec_fw_version >= 512))
+ adev->gfx.cp_gfx_shadow = true;
+ break;
+ default:
+ adev->gfx.cp_gfx_shadow = false;
+ break;
+ }
+}
+
static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[40];
@@ -539,6 +556,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
+ gfx_v11_0_check_fw_cp_gfx_shadow(adev);
out:
if (err) {
amdgpu_ucode_release(&adev->gfx.pfp_fw);
@@ -699,7 +717,7 @@ static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -747,7 +765,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
}
-static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
* field when performing a select_se_sh so it should be
@@ -773,7 +791,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -784,7 +802,7 @@ static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
dst);
}
-static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -795,11 +813,32 @@ static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
soc21_grbm_select(adev, me, pipe, q, vm);
}
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ if (adev->gfx.cp_gfx_shadow) {
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ return 0;
+ } else {
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -ENOTSUPP;
+ }
+}
+
static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v11_0_select_se_sh,
@@ -808,6 +847,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
};
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
@@ -866,7 +906,7 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
@@ -897,7 +937,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX11_MEC_HPD_SIZE);
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
@@ -1367,8 +1407,8 @@ static int gfx_v11_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
- j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v11_0_compute_ring_init(adev, ring_id,
@@ -1382,19 +1422,19 @@ static int gfx_v11_0_sw_init(void *handle)
}
if (!adev->enable_mes_kiq) {
- r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
+ r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
if (r)
return r;
}
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
if (r)
return r;
@@ -1456,11 +1496,11 @@ static int gfx_v11_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
if (!adev->enable_mes_kiq) {
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
}
gfx_v11_0_pfp_fini(adev);
@@ -1477,7 +1517,7 @@ static int gfx_v11_0_sw_fini(void *handle)
}
static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -1598,6 +1638,7 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
/* Enable trap for each kfd vmid. */
data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
}
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -1667,7 +1708,7 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
soc21_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@@ -3188,7 +3229,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
- u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
@@ -3280,11 +3320,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v11_0_cp_gfx_start(adev);
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-
return 0;
}
@@ -3330,8 +3365,6 @@ static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
}
- adev->gfx.kiq.ring.sched.ready = enable;
-
udelay(50);
}
@@ -3633,55 +3666,6 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
return 0;
}
-#ifdef BRING_UP_DEBUG
-static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- struct v11_gfx_mqd *mqd = ring->mqd_ptr;
-
- /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
-
- /* set GFX_MQD_BASE */
- WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
- WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set GFX_MQD_CONTROL */
- WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
-
- /* set GFX_HQD_VMID to 0 */
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
-
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
- mqd->cp_gfx_hqd_queue_priority);
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
-
- /* set GFX_HQD_BASE, similar as CP_RB_BASE */
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
-
- /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
-
- /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
-
- /* set RB_WPTR_POLL_ADDR */
- WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
-
- /* set RB_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
-
- /* active the queue */
- WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
-
- return 0;
-}
-#endif
-
static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -3693,59 +3677,23 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
amdgpu_ring_init_mqd(ring);
-#ifdef BRING_UP_DEBUG
- gfx_v11_0_gfx_queue_init_register(ring);
-#endif
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) {
- /* reset mqd with the backup copy */
+ } else {
+ /* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
amdgpu_ring_clear_ring(ring);
-#ifdef BRING_UP_DEBUG
- mutex_lock(&adev->srbm_mutex);
- soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- gfx_v11_0_gfx_queue_init_register(ring);
- soc21_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-#endif
- } else {
- amdgpu_ring_clear_ring(ring);
}
return 0;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- int r, i;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
- adev->gfx.num_gfx_rings);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-#endif
-
static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
@@ -3756,7 +3704,7 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- goto done;
+ return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
@@ -3766,23 +3714,14 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
}
amdgpu_bo_unreserve(ring->mqd_obj);
if (r)
- goto done;
+ return r;
}
-#ifndef BRING_UP_DEBUG
- r = gfx_v11_0_kiq_enable_kgq(adev);
- if (r)
- goto done;
-#endif
- r = gfx_v11_0_cp_gfx_start(adev);
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-done:
- return r;
+ return gfx_v11_0_cp_gfx_start(adev);
}
static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
@@ -4028,14 +3967,13 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v11_compute_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v11_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -4057,8 +3995,8 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
@@ -4080,17 +4018,14 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
-
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
- } else {
- amdgpu_ring_clear_ring(ring);
}
return 0;
@@ -4101,7 +4036,7 @@ static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
int r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
@@ -4146,7 +4081,7 @@ static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
goto done;
}
- r = amdgpu_gfx_enable_kcq(adev);
+ r = amdgpu_gfx_enable_kcq(adev, 0);
done:
return r;
}
@@ -4239,7 +4174,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
false : true;
adev->gfxhub.funcs->set_fault_enable_default(adev, value);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
return 0;
}
@@ -4407,48 +4342,20 @@ static int gfx_v11_0_hw_init(void *handle)
return r;
}
-#ifndef BRING_UP_DEBUG
-static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i, r = 0;
-
- if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
- return -EINVAL;
-
- if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_gfx_rings))
- return -ENOMEM;
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
- PREEMPT_QUEUES, 0, 0);
-
- if (adev->gfx.kiq.ring.sched.ready)
- r = amdgpu_ring_test_helper(kiq_ring);
-
- return r;
-}
-#endif
-
static int gfx_v11_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
if (!adev->no_hw_access) {
-#ifndef BRING_UP_DEBUG
if (amdgpu_async_gfx_ring) {
- r = gfx_v11_0_kiq_disable_kgq(adev);
- if (r)
+ if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
-#endif
- if (amdgpu_gfx_disable_kcq(adev))
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
amdgpu_mes_kiq_hw_fini(adev);
@@ -4525,7 +4432,7 @@ static int gfx_v11_0_soft_reset(void *handle)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- gfx_v11_0_set_safe_mode(adev);
+ gfx_v11_0_set_safe_mode(adev, 0);
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
@@ -4625,7 +4532,7 @@ static int gfx_v11_0_soft_reset(void *handle)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- gfx_v11_0_unset_safe_mode(adev);
+ gfx_v11_0_unset_safe_mode(adev, 0);
return gfx_v11_0_cp_resume(adev);
}
@@ -4794,7 +4701,7 @@ static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
}
-static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -4813,7 +4720,7 @@ static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
}
@@ -5041,7 +4948,7 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
@@ -5061,7 +4968,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
AMD_CG_SUPPORT_GFX_3D_CGLS))
gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5129,11 +5036,11 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
gfx_v11_cntl_power_gating(adev, enable);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v11_0_set_powergating_state(void *handle,
@@ -5592,6 +5499,29 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
+ u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow,
+ int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (!adev->gfx.cp_gfx_shadow)
+ return;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+ amdgpu_ring_write(ring, lower_32_bits(shadow_va));
+ amdgpu_ring_write(ring, upper_32_bits(shadow_va));
+ amdgpu_ring_write(ring, lower_32_bits(gds_va));
+ amdgpu_ring_write(ring, upper_32_bits(gds_va));
+ amdgpu_ring_write(ring, lower_32_bits(csa_va));
+ amdgpu_ring_write(ring, upper_32_bits(csa_va));
+ amdgpu_ring_write(ring, shadow_va ?
+ PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
+ amdgpu_ring_write(ring, init_shadow ?
+ PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+}
+
static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
{
unsigned ret;
@@ -5623,7 +5553,7 @@ static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
@@ -6091,7 +6021,7 @@ static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
uint32_t tmp, target;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
target += ring->pipe;
@@ -6182,6 +6112,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
+ 9 + /* SET_Q_PREEMPTION_MODE */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
@@ -6208,6 +6139,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+ .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v11_0_ring_preempt_ib,
@@ -6288,7 +6220,7 @@ static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
@@ -6437,7 +6369,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
counter = 0;
- gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 8 && j < 2)
gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
@@ -6469,7 +6401,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
active_cu_number += counter;
}
}
- gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
index 068b9586a223..26d6286d86c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -84,8 +84,20 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
/* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */
if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
(entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
- !entry->vmid && !entry->pasid)
+ !entry->vmid && !entry->pasid) {
+ uint32_t rlc_status0 = 0;
+
+ rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0);
+
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+
amdgpu_ras_reset_gpu(adev);
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index c41219e23151..da6caff78c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1285,7 +1285,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
- u32 sh_num, u32 instance)
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -1438,12 +1438,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on SI */
- gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
}
/* GRBM_GFX_INDEX has a different offset on SI */
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
@@ -1459,14 +1459,14 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v6_0_get_rb_active_bitmap(adev);
active_rbs |= data <<
((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -1487,7 +1487,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -1496,7 +1496,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG);
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1535,7 +1535,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = RREG32(mmSPI_STATIC_THREAD_MGMT_3);
active_cu = gfx_v6_0_get_cu_enabled(adev);
@@ -1550,7 +1550,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
}
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -2391,7 +2391,7 @@ static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
if (!enable) {
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmSPI_LB_CU_MASK, 0x00ff);
}
}
@@ -2968,7 +2968,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -2993,7 +2993,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -3003,7 +3003,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
DRM_INFO("Not implemented\n");
}
@@ -3028,6 +3028,7 @@ static int gfx_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
GFX6_NUM_COMPUTE_RINGS);
@@ -3073,7 +3074,7 @@ static int gfx_v6_0_sw_init(void *handle)
ring = &adev->gfx.gfx_ring[i];
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
- r = amdgpu_ring_init(adev, ring, 1024,
+ r = amdgpu_ring_init(adev, ring, 2048,
&adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -3571,7 +3572,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v6_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
@@ -3593,7 +3594,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
}
}
- gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 9d5c1e29b4a3..8c174c11eaee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1548,11 +1548,12 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
* @sh_num: sh block to address
* @instance: Certain registers are instanced per SE or SH.
* 0xffffffff means broadcast to all SEs or SHs (CIK).
- *
+ * @xcc_id: xcc accelerated compute core id
* Select which SE, SH combinations to address.
*/
static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
- u32 se_num, u32 sh_num, u32 instance)
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
{
u32 data;
@@ -1732,13 +1733,13 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on CI+ */
- gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
}
/* GRBM_GFX_INDEX has a different offset on CI+ */
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
/**
@@ -1761,13 +1762,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v7_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -1790,7 +1791,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -1801,7 +1802,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG_1);
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1911,7 +1912,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -2728,7 +2729,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -3301,7 +3302,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3309,7 +3310,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3361,7 +3362,7 @@ static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 tmp, i, mask;
@@ -3383,7 +3384,7 @@ static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
u32 tmp;
@@ -3474,7 +3475,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(mmRLC_LB_PARAMS, 0x00600408);
WREG32(mmRLC_LB_CNTL, 0x80000004);
@@ -3530,7 +3531,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3584,7 +3585,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3635,7 +3636,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@@ -4111,7 +4112,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -4136,7 +4137,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -4146,7 +4147,7 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
cik_srbm_select(adev, me, pipe, q, vm);
}
@@ -4178,6 +4179,7 @@ static int gfx_v7_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -4456,7 +4458,8 @@ static int gfx_v7_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v7_0_compute_ring_init(adev,
@@ -5114,7 +5117,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
@@ -5135,7 +5138,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index b1f2684d854a..51c1745c8369 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1304,7 +1304,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
u32 *hpd;
size_t mec_hpd_size;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -2001,7 +2001,8 @@ static int gfx_v8_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v8_0_compute_ring_init(adev,
@@ -2015,19 +2016,19 @@ static int gfx_v8_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
+ r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
if (r)
return r;
/* create MQD for all compute queues as well as KIQ for SRIOV case */
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
if (r)
return r;
@@ -2050,9 +2051,9 @@ static int gfx_v8_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
gfx_v8_0_mec_fini(adev);
amdgpu_gfx_rlc_fini(adev);
@@ -3394,7 +3395,8 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
- u32 se_num, u32 sh_num, u32 instance)
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
{
u32 data;
@@ -3417,7 +3419,7 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
}
static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
vi_srbm_select(adev, me, pipe, q, vm);
}
@@ -3578,13 +3580,13 @@ gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
}
/* GRBM_GFX_INDEX has a different offset on VI */
- gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
}
/* GRBM_GFX_INDEX has a different offset on VI */
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
}
static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
@@ -3600,13 +3602,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
@@ -3629,7 +3631,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
/* cache the values for userspace */
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
adev->gfx.config.rb_config[i][j].rb_backend_disable =
RREG32(mmCC_RB_BACKEND_DISABLE);
adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
@@ -3640,7 +3642,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
RREG32(mmPA_SC_RASTER_CONFIG_1);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -3787,7 +3789,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3818,7 +3820,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3826,7 +3828,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
if (k == adev->usec_timeout) {
gfx_v8_0_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -3834,7 +3836,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -4281,7 +4283,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
amdgpu_ring_clear_ring(ring);
gfx_v8_0_cp_gfx_start(adev);
- ring->sched.ready = true;
return 0;
}
@@ -4292,7 +4293,7 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32(mmCP_MEC_CNTL, 0);
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -4314,12 +4315,12 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
uint64_t queue_mask = 0;
int r, i;
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
continue;
/* This situation may be hit in the future if a new HW
@@ -4595,14 +4596,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct vi_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v8_0_kiq_setting(ring);
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
@@ -4625,8 +4625,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
}
return 0;
@@ -4650,15 +4650,13 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
- } else {
- amdgpu_ring_clear_ring(ring);
}
return 0;
}
@@ -4678,21 +4676,22 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
int r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (unlikely(r != 0))
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
return r;
+ }
gfx_v8_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
return 0;
}
@@ -4741,7 +4740,7 @@ static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
if (r)
return r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
@@ -4808,7 +4807,7 @@ static int gfx_v8_0_hw_init(void *handle)
static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
if (r)
@@ -4902,7 +4901,7 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (!gfx_v8_0_wait_for_idle(adev))
gfx_v8_0_cp_enable(adev, false);
else
@@ -4911,7 +4910,7 @@ static int gfx_v8_0_hw_fini(void *handle)
adev->gfx.rlc.funcs->stop(adev);
else
pr_err("rlc is busy, skip halt rlc\n");
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5216,7 +5215,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32(mmSQ_IND_DATA);
}
-static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 0 wave data */
dst[(*no_fields)++] = 0;
@@ -5241,7 +5240,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -5263,6 +5262,7 @@ static int gfx_v8_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -5376,7 +5376,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -5430,7 +5430,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5481,7 +5481,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
{
uint32_t data;
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5535,7 +5535,7 @@ static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -5562,7 +5562,7 @@ static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -5621,7 +5621,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
@@ -5717,7 +5717,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
gfx_v8_0_wait_for_rlc_serdes(adev);
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5727,7 +5727,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
@@ -5810,7 +5810,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
gfx_v8_0_wait_for_rlc_serdes(adev);
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
@@ -6723,11 +6723,11 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
*/
if (from_wq) {
mutex_lock(&adev->grbm_idx_mutex);
- gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+ gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -7001,7 +7001,7 @@ static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
@@ -7116,7 +7116,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
if (i < 4 && j < 2)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
@@ -7137,7 +7137,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a674c8a58dc2..65577eca58f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -760,7 +760,7 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if);
+ void *inject_if, uint32_t instance_mask);
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
@@ -888,7 +888,7 @@ static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
- adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+ adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
}
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
@@ -1494,7 +1494,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
mask = 1;
cu_bitmap = 0;
counter = 0;
- amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (cu_info->bitmap[i][j] & mask) {
@@ -1513,7 +1513,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
}
}
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
}
@@ -1535,7 +1535,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1584,7 +1584,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1703,7 +1703,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
const struct gfx_firmware_header_v1_0 *mec_hdr;
- bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
@@ -1778,7 +1778,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
}
-static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
{
/* type 1 wave data */
dst[(*no_fields)++] = 1;
@@ -1799,7 +1799,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
-static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
@@ -1808,7 +1808,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
}
-static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
@@ -1819,9 +1819,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
}
static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
- soc15_grbm_select(adev, me, pipe, q, vm);
+ soc15_grbm_select(adev, me, pipe, q, vm, 0);
}
static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
@@ -1995,7 +1995,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX9_MEC_HPD_SIZE);
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
@@ -2095,7 +2095,7 @@ static int gfx_v9_0_sw_init(void *handle)
/* disable scheduler on the real ring */
ring->no_scheduler = true;
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -2113,7 +2113,7 @@ static int gfx_v9_0_sw_init(void *handle)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
ring->is_sw_ring = true;
hw_prio = amdgpu_sw_ring_priority(i);
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
NULL);
@@ -2144,7 +2144,8 @@ static int gfx_v9_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
continue;
r = gfx_v9_0_compute_ring_init(adev,
@@ -2158,19 +2159,19 @@ static int gfx_v9_0_sw_init(void *handle)
}
}
- r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq;
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
if (r)
return r;
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
- r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
if (r)
return r;
@@ -2205,9 +2206,9 @@ static int gfx_v9_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
- amdgpu_gfx_kiq_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
gfx_v9_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
@@ -2230,7 +2231,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
- u32 instance)
+ u32 instance, int xcc_id)
{
u32 data;
@@ -2279,19 +2280,42 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
data = gfx_v9_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
}
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
#define DEFAULT_SH_MEM_BASES (0x6000)
static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
{
@@ -2313,12 +2337,12 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_lock(&adev->srbm_mutex);
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
- soc15_grbm_select(adev, 0, 0, 0, i);
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
/* CP and shaders */
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
@@ -2356,8 +2380,8 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 4, 1):
tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
- tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
- DISABLE_BARRIER_WAITCNT, 1);
+ tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !READ_ONCE(adev->barrier_has_auto_waitcnt));
WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
break;
default:
@@ -2382,8 +2406,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
- soc15_grbm_select(adev, 0, 0, 0, i);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
/* CP and shaders */
if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
@@ -2405,7 +2429,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
}
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -2422,7 +2446,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -2430,7 +2454,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
if (k == adev->usec_timeout) {
amdgpu_gfx_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -2438,7 +2462,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3133,7 +3157,6 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v9_0_cp_gfx_start(adev);
- ring->sched.ready = true;
return 0;
}
@@ -3145,7 +3168,7 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
}
@@ -3509,7 +3532,6 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
- int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
struct v9_mqd *tmp_mqd;
gfx_v9_0_kiq_setting(ring);
@@ -3519,20 +3541,20 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
* driver need to re-init the mqd.
* check mqd->cp_hqd_pq_control since this value should not be 0
*/
- tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
/* for GPU_RESET case , reset MQD to a clean status */
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_kiq_init_register(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
@@ -3541,14 +3563,14 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_sriov_vf(adev) && adev->in_suspend)
amdgpu_ring_clear_ring(ring);
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_mqd_init(ring);
gfx_v9_0_kiq_init_register(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
}
return 0;
@@ -3572,24 +3594,21 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
gfx_v9_0_mqd_init(ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
- } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
- /* reset MQD to a clean status */
+ } else {
+ /* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
-
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
- } else {
- amdgpu_ring_clear_ring(ring);
}
return 0;
@@ -3600,7 +3619,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
int r;
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
@@ -3616,7 +3635,6 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
return 0;
}
@@ -3644,7 +3662,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
goto done;
}
- r = amdgpu_gfx_enable_kcq(adev);
+ r = amdgpu_gfx_enable_kcq(adev, 0);
done:
return r;
}
@@ -3764,7 +3782,7 @@ static int gfx_v9_0_hw_fini(void *handle)
/* DF freeze and kcq disable will fail */
if (!amdgpu_ras_intr_triggered())
/* disable KCQ to avoid CPC touch memory not valid anymore */
- amdgpu_gfx_disable_kcq(adev);
+ amdgpu_gfx_disable_kcq(adev, 0);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3782,11 +3800,11 @@ static int gfx_v9_0_hw_fini(void *handle)
*/
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
mutex_lock(&adev->srbm_mutex);
- soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
- adev->gfx.kiq.ring.pipe,
- adev->gfx.kiq.ring.queue, 0);
- gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
+ adev->gfx.kiq[0].ring.pipe,
+ adev->gfx.kiq[0].ring.queue, 0, 0);
+ gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
@@ -3906,7 +3924,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
unsigned long flags;
uint32_t seq, reg_val_offs = 0;
uint64_t value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_rreg);
@@ -4506,6 +4524,7 @@ static int gfx_v9_0_early_init(void *handle)
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ adev->gfx.xcc_mask = 1;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
gfx_v9_0_set_kiq_pm4_funcs(adev);
@@ -4571,6 +4590,13 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ gfx_v9_4_2_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+ else
+ gfx_v9_0_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+
return 0;
}
@@ -4586,7 +4612,7 @@ static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
return true;
}
-static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
@@ -4603,7 +4629,7 @@ static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
}
}
-static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
@@ -4614,7 +4640,7 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -4626,7 +4652,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -4653,7 +4679,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t data, def;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
@@ -4720,7 +4746,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
}
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
@@ -4731,7 +4757,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (!adev->gfx.num_gfx_rings)
return;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
/* Enable 3D CGCG/CGLS */
if (enable) {
@@ -4775,7 +4801,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -4783,7 +4809,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
@@ -4827,7 +4853,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -5425,7 +5451,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
@@ -6384,7 +6410,7 @@ static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
};
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
+ void *inject_if, uint32_t instance_mask)
{
struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret;
@@ -6423,7 +6449,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
block_info.value = info->value;
mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
mutex_unlock(&adev->grbm_idx_mutex);
return ret;
@@ -6651,7 +6677,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
- amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
+ amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
}
}
@@ -6713,7 +6739,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
- amdgpu_gfx_select_se_sh(adev, j, 0, k);
+ amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
reg_value =
RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
if (reg_value)
@@ -6728,7 +6754,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
err_data->ce_count += sec_count;
err_data->ue_count += ded_count;
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
gfx_v9_0_query_utc_edc_status(adev, err_data);
@@ -7010,7 +7036,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
+ adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
@@ -7191,7 +7217,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
mask = 1;
ao_bitmap = 0;
counter = 0;
- amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
gfx_v9_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
@@ -7224,7 +7250,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
}
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
index dfe8d4841f58..f9f6edc5e558 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -27,6 +27,6 @@
extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
- u32 instance);
+ u32 instance, int xcc_id);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index c67e387a97f5..bc8416afb62c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -970,29 +970,6 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
}
-static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
-{
- struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
- int ret;
- struct ta_ras_trigger_error_input block_info = { 0 };
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
-
- block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
- block_info.sub_block_index = info->head.sub_block_index;
- block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
- block_info.address = info->address;
- block_info.value = info->value;
-
- mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return ret;
-}
-
static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs =
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
@@ -1030,7 +1007,6 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
const struct amdgpu_ras_block_hw_ops gfx_v9_4_ras_ops = {
- .ras_error_inject = &gfx_v9_4_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
.query_ras_error_status = &gfx_v9_4_query_ras_error_status,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 3a797424579c..63f6843a069e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -761,7 +761,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
for (i = first_vmid; i < last_vmid; i++) {
data = 0;
- soc15_grbm_select(adev, 0, 0, 0, i);
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE,
@@ -769,15 +769,18 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL), data);
}
- soc15_grbm_select(adev, 0, 0, 0, 0);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA1), 0);
}
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev)
{
u32 tmp;
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
tmp = 0;
tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1);
@@ -1699,28 +1702,6 @@ static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev)
gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL);
}
-static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
-{
- struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
- int ret;
- struct ta_ras_trigger_error_input block_info = { 0 };
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- return -EINVAL;
-
- block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
- block_info.sub_block_index = info->head.sub_block_index;
- block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
- block_info.address = info->address;
- block_info.value = info->value;
-
- mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return ret;
-}
-
static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
{
uint32_t i, j;
@@ -1935,7 +1916,7 @@ static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
u32 status = 0;
struct amdgpu_vmhub *hub;
- hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
status = RREG32(hub->vm_l2_pro_fault_status);
/* reset page fault status */
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
@@ -1944,7 +1925,6 @@ static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
}
struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
- .ras_error_inject = &gfx_v9_4_2_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
.query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 5f8500577c02..c1ee54d4c3d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -25,35 +25,504 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "vega10_enum.h"
+#include "v9_structs.h"
+
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
#include "gc/gc_9_4_3_offset.h"
#include "gc/gc_9_4_3_sh_mask.h"
#include "gfx_v9_4_3.h"
+#include "amdgpu_xcp.h"
+
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
+#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras;
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+
+static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_4_3_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_4_3_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_4_3_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs;
+}
+
+static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
+{
+ int i, num_xcc, dev_inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ dev_inst = GET_INST(GC, i);
+ if (dev_inst >= 2)
+ WREG32_SOC15(GC, dev_inst, regGRBM_MCM_ADDR, 0x4);
+
+ /* Golden settings applied by driver for ASIC with rev_id 0 */
+ if (adev->rev_id == 0) {
+ WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
+ GOLDEN_GB_ADDR_CONFIG);
+
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
+ REDUCE_FIFO_DEPTH_BY_2, 2);
+ }
+ }
+}
+
+static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) |
+ (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring)
+{
+ uint32_t scratch_reg0_offset, xcc_offset;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ /* Use register offset which is local to XCC in the packet */
+ xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
+ WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch_reg0_offset);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 16,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err1;
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err2:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/* This value might differs per partition */
static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
- WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
- clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) |
- ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
mutex_unlock(&adev->gfx.gpu_clock_mutex);
amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
-static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev,
- u32 se_num,
- u32 sh_num,
- u32 instance)
+static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ char fw_name[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
+ return err;
+}
+
+static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev)
+{
+ return true;
+}
+
+static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev)
+{
+ if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+}
+
+static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ char fw_name[30];
+ int err;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+ adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
+ adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
+
+ gfx_v9_4_3_check_if_need_gfxoff(adev);
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ return err;
+}
+
+static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ int r;
+
+ chip_name = "gc_9_4_3";
+
+ r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+ u32 *hpd;
+ const __le32 *fw_data;
+ unsigned fw_size;
+ u32 *fw;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
+ AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size =
+ adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ for (i = 0; i < mec_hpd_size / 4; i++) {
+ memset((void *)(hpd + i), 0, 4);
+ if (i % 50 == 0)
+ msleep(1);
+ }
+ } else {
+ memset(hpd, 0, mec_hpd_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
@@ -76,24 +545,24 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev,
else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
- WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data);
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
}
-static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address)
{
- WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX,
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
(address << SQ_IND_INDEX__INDEX__SHIFT) |
(SQ_IND_INDEX__FORCE_READ_MASK));
- return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+ return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
}
-static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t regno, uint32_t num, uint32_t *out)
{
- WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX,
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
(regno << SQ_IND_INDEX__INDEX__SHIFT) |
@@ -101,53 +570,481 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
(SQ_IND_INDEX__FORCE_READ_MASK) |
(SQ_IND_INDEX__AUTO_INCR_MASK));
while (num--)
- *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+ *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
}
static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev,
- uint32_t simd, uint32_t wave,
+ uint32_t xcc_id, uint32_t simd, uint32_t wave,
uint32_t *dst, int *no_fields)
{
/* type 1 wave data */
dst[(*no_fields)++] = 1;
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
- dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
-}
-
-static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
- wave_read_regs(adev, simd, wave, 0,
+ wave_read_regs(adev, xcc_id, simd, wave, 0,
start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
}
-static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
{
- wave_read_regs(adev, simd, wave, thread,
+ wave_read_regs(adev, xcc_id, simd, wave, thread,
start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
}
static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q, u32 vm)
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
+}
+
+
+static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
+ int num_xccs_per_xcp)
+{
+ int ret;
+
+ ret = psp_spatial_partition(&adev->psp, NUM_XCC(adev->gfx.xcc_mask) /
+ num_xccs_per_xcp);
+ if (ret)
+ return ret;
+
+ adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp;
+
+ return ret;
+}
+
+static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
+{
+ int xcc;
+
+ xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0));
+ if (!xcc) {
+ dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
+ return -EINVAL;
+ }
+
+ return xcc - 1;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh,
+ .read_wave_data = &gfx_v9_4_3_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
+ .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
+ .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+};
+
+static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
+ adev->gfx.ras = &gfx_v9_4_3_ras;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_BANKS);
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int xcc_id, int mec, int pipe, int queue)
+{
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+ unsigned int hw_prio;
+ uint32_t xcc_doorbell_start;
+
+ ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
+ ring_id];
+
+ /* mec0 is me1 */
+ ring->xcc_id = xcc_id;
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range;
+ ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
+ (ring_id + xcc_id * adev->gfx.num_compute_rings) *
+ GFX9_MEC_HPD_SIZE;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ sprintf(ring->name, "comp_%d.%d.%d.%d",
+ ring->xcc_id, ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v9_4_3_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id, xcc_id, num_xcc;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v9_4_3_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec;
+ k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(
+ adev, xcc_id, i, k, j))
+ continue;
+
+ r = gfx_v9_4_3_compute_ring_init(adev,
+ ring_id,
+ xcc_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq[xcc_id];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = amdgpu_gfx_mqd_sw_init(adev,
+ sizeof(struct v9_mqd_allocation), xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_4_3_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_gfx_ras_sw_init(adev);
+}
+
+static int gfx_v9_4_3_sw_fini(void *handle)
{
- soc15_grbm_select(adev, me, pipe, q, vm);
+ int i, num_xcc;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ for (i = 0; i < num_xcc; i++) {
+ amdgpu_gfx_mqd_sw_fini(adev, i);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
+ amdgpu_gfx_kiq_fini(adev, i);
+ }
+
+ gfx_v9_4_3_mec_fini(adev);
+ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+ gfx_v9_4_3_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ return 0;
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 tmp;
+ int i;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >>
+ 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >>
+ 48));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, tmp);
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 =
+ RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
+
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+}
+
+static void
+gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1);
+}
+
+static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id)
+{
+ /*
+ * Rlc save restore list is workable since v2_1.
+ * And it's needed by gfxoff feature.
+ */
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
+}
+
+static void gfx_v9_4_3_xcc_program_xcc_id(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t tmp = 0;
+ int num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (num_xcc) {
+ /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */
+ case 1:
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, 0x8);
+ break;
+ case 2:
+ case 4:
+ case 6:
+ case 8:
+ tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID);
+ tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp);
+
+ break;
+ default:
+ break;
+ }
}
static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
@@ -155,36 +1052,37 @@ static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
uint32_t rlc_setting;
/* if RLC is not enabled, do nothing */
- rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ rlc_setting = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return false;
return true;
}
-static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
{
uint32_t data;
unsigned i;
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
- WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
- if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
}
-static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev)
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
{
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
- WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
}
static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
@@ -196,7 +1094,8 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev)
+static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev,
+ int xcc_id)
{
u32 i, j, k;
u32 mask;
@@ -204,15 +1103,17 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff);
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff,
+ xcc_id);
for (k = 0; k < adev->usec_timeout; k++) {
- if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0)
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
udelay(1);
}
if (k == adev->usec_timeout) {
- gfx_v9_4_3_select_se_sh(adev, 0xffffffff,
- 0xffffffff, 0xffffffff);
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff,
+ 0xffffffff,
+ 0xffffffff, xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
i, j);
@@ -220,7 +1121,8 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -228,79 +1130,108 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev)
RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
for (k = 0; k < adev->usec_timeout; k++) {
- if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
break;
udelay(1);
}
}
-static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev,
- bool enable)
+static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
{
u32 tmp;
/* These interrupts should be enabled to drive DS clock */
- tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
- if (adev->gfx.num_gfx_rings)
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
- WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v9_4_3_xcc_rlc_stop(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 0);
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+ gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, xcc_id);
}
static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev)
{
- WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
- gfx_v9_4_3_enable_gui_idle_interrupt(adev, false);
- gfx_v9_4_3_wait_for_rlc_serdes(adev);
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_stop(adev, i);
}
-static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev)
+static void gfx_v9_4_3_xcc_rlc_reset(struct amdgpu_device *adev, int xcc_id)
{
- WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 1);
udelay(50);
- WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 0);
udelay(50);
}
-static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
+static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev)
{
-#ifdef AMDGPU_RLC_DEBUG_RETRY
- u32 rlc_ucode_ver;
-#endif
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_reset(adev, i);
+}
- WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+static void gfx_v9_4_3_xcc_rlc_start(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 1);
udelay(50);
/* carrizo do enable cp interrupt after cp inited */
if (!(adev->flags & AMD_IS_APU)) {
- gfx_v9_4_3_enable_gui_idle_interrupt(adev, true);
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
udelay(50);
}
+}
+static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
+{
#ifdef AMDGPU_RLC_DEBUG_RETRY
- /* RLC_GPM_GENERAL_6 : RLC Ucode version */
- rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6);
- if (rlc_ucode_ver == 0x108) {
- dev_info(adev->dev,
- "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
- rlc_ucode_ver, adev->gfx.rlc_fw_version);
- /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
- * default is 0x9C4 to create a 100us interval */
- WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4);
- /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
- * to disable the page fault retry interrupts, default is
- * 0x100 (256) */
- WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100);
- }
+ u32 rlc_ucode_ver;
+#endif
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_rlc_start(adev, i);
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ /* RLC_GPM_GENERAL_6 : RLC Ucode version */
+ rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6);
+ if (rlc_ucode_ver == 0x108) {
+ dev_info(adev->dev,
+ "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ rlc_ucode_ver, adev->gfx.rlc_fw_version);
+ /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
+ * default is 0x9C4 to create a 100us interval */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_TIMER_INT_3, 0x9C4);
+ /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
+ * to disable the page fault retry interrupts, default is
+ * 0x100 (256) */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_12, 0x100);
+ }
#endif
+ }
}
-static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev)
+static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
{
const struct rlc_firmware_header_v2_0 *hdr;
const __le32 *fw_data;
@@ -316,49 +1247,65 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev)
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
- WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
RLCG_UCODE_LOADING_START_ADDRESS);
for (i = 0; i < fw_size; i++) {
if (amdgpu_emu_mode == 1 && i % 100 == 0) {
dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i);
msleep(1);
}
- WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
}
- WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
return 0;
}
-static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
+static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id)
{
int r;
- adev->gfx.rlc.funcs->stop(adev);
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
+ /* legacy rlc firmware loading */
+ r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
+ }
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
/* disable CG */
- WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
+ gfx_v9_4_3_xcc_init_pg(adev, xcc_id);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
- /* TODO: revisit pg function */
- /* gfx_v9_4_3_init_pg(adev);*/
+ return 0;
+}
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- /* legacy rlc firmware loading */
- r = gfx_v9_4_3_rlc_load_microcode(adev);
+static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
if (r)
return r;
}
- adev->gfx.rlc.funcs->start(adev);
-
return 0;
}
-static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev,
+ unsigned vmid)
{
u32 reg, data;
- reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(reg);
else
@@ -368,9 +1315,9 @@ static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
else
- WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
}
static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
@@ -382,7 +1329,7 @@ static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev,
uint32_t offset,
struct soc15_reg_rlcg *entries, int arr_size)
{
- int i;
+ int i, inst;
uint32_t reg;
if (!entries)
@@ -392,7 +1339,12 @@ static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev,
const struct soc15_reg_rlcg *entry;
entry = &entries[i];
- reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ inst = adev->ip_map.logical_to_dev_inst ?
+ adev->ip_map.logical_to_dev_inst(
+ adev, entry->hwip, entry->instance) :
+ entry->instance;
+ reg = adev->reg_offset[entry->hwip][inst][entry->segment] +
+ entry->reg;
if (offset == reg)
return true;
}
@@ -407,19 +1359,1025 @@ static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offs
ARRAY_SIZE(rlcg_access_gc_9_4_3));
}
-const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
- .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
- .select_se_sh = &gfx_v9_4_3_select_se_sh,
- .read_wave_data = &gfx_v9_4_3_read_wave_data,
- .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs,
- .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs,
- .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
-};
+static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ if (enable) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
+ } else {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[xcc_id].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v9_4_3_xcc_cp_compute_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+ u32 mec_ucode_addr_offset;
+ u32 mec_ucode_data_offset;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+ adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ mec_ucode_addr_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_ADDR);
+ mec_ucode_data_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_DATA);
+
+ /* MEC1 */
+ WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset);
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32(mec_ucode_data_offset,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version);
+ /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
+
+ return 0;
+}
+
+/* KIQ functions */
+static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+}
+
+static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+ }
+}
+
+static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ mqd->dynamic_cu_mask_addr_lo =
+ lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+ mqd->dynamic_cu_mask_addr_hi =
+ upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a queue/ring */
+ gfx_v9_4_3_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_QUANTUM);
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_LOWER,
+ ((adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_UPPER,
+ ((adev->doorbell_index.userqueue_end +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int j;
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+
+ if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+ DRM_DEBUG("%s dequeue request failed.\n", ring->name);
+
+ /* Manual disable if dequeue request times out */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ struct v9_mqd *tmp_mqd;
+
+ gfx_v9_4_3_xcc_kiq_setting(ring, xcc_id);
+
+ /* GPU could be in bad state during probe, driver trigger the reset
+ * after load the SMU, in this case , the mqd is not be initialized.
+ * driver need to re-init the mqd.
+ * check mqd->cp_hqd_pq_control since this value should not be 0
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup;
+ if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) {
+ /* for GPU_RESET case , reset MQD to a clean status */
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+ struct v9_mqd *tmp_mqd;
+
+ /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
+ * is not be initialized before
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+
+ if (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int j;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring[j + xcc_id * adev->gfx.num_compute_rings];
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me,
+ ring->pipe,
+ ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[xcc_id].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev, xcc_id);
+done:
+ return r;
+}
+
+static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r, j;
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_disable_gpa_mode(adev, xcc_id);
+
+ r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ /* set the virtual and physical id based on partition_mode */
+ gfx_v9_4_3_xcc_program_xcc_id(adev, xcc_id);
+
+ r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_xcc_kcq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring
+ [j + xcc_id * adev->gfx.num_compute_rings];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
+{
+ int r = 0, i, num_xcc;
+
+ if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
+ amdgpu_user_partt_mode);
+
+ if (r)
+ return r;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable,
+ int xcc_id)
+{
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
+{
+ if (amdgpu_gfx_disable_kcq(adev, xcc_id))
+ DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id);
+
+ /* Use deinitialize sequence from CAIL when unbinding device
+ * from driver, otherwise KIQ is hanging when binding back
+ */
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, adev->gfx.kiq[xcc_id].ring.me,
+ adev->gfx.kiq[xcc_id].ring.pipe,
+ adev->gfx.kiq[xcc_id].ring.queue, 0,
+ GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring,
+ xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
+ gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
+}
+
+static int gfx_v9_4_3_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfx_v9_4_3_init_golden_registers(adev);
+
+ gfx_v9_4_3_constants_init(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v9_4_3_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, num_xcc;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_fini(adev, i);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_suspend(void *handle)
+{
+ return gfx_v9_4_3_hw_fini(handle);
+}
+
+static int gfx_v9_4_3_resume(void *handle)
+{
+ return gfx_v9_4_3_hw_init(handle);
+}
+
+static bool gfx_v9_4_3_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ }
+ return true;
+}
+
+static int gfx_v9_4_3_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v9_4_3_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v9_4_3_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v9_4_3_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ gfx_v9_4_3_set_kiq_pm4_funcs(adev);
+ gfx_v9_4_3_set_ring_funcs(adev);
+ gfx_v9_4_3_set_irq_funcs(adev);
+ gfx_v9_4_3_set_gds_init(adev);
+ gfx_v9_4_3_set_rlc_funcs(adev);
+
+ return gfx_v9_4_3_init_microcode(adev);
+}
+
+static int gfx_v9_4_3_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL);
+
+ if (enable)
+ data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
+ else
+ data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data);
+}
+
+static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void
+gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t data, def;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+
+}
+
+static void
+gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+
+ data = (0x36
+ << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+ }
+
+}
-const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
+static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+
+ if (enable) {
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === CGCG + CGLS === */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === MGCG + MGLS === */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
.is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled,
- .set_safe_mode = gfx_v9_4_3_set_safe_mode,
- .unset_safe_mode = gfx_v9_4_3_unset_safe_mode,
+ .set_safe_mode = gfx_v9_4_3_xcc_set_safe_mode,
+ .unset_safe_mode = gfx_v9_4_3_xcc_unset_safe_mode,
.init = gfx_v9_4_3_rlc_init,
.resume = gfx_v9_4_3_rlc_resume,
.stop = gfx_v9_4_3_rlc_stop,
@@ -428,3 +2386,1982 @@ const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
.update_spm_vmid = gfx_v9_4_3_update_spm_vmid,
.is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
};
+
+static int gfx_v9_4_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int gfx_v9_4_3_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_update_gfx_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, i);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+}
+
+static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+ EOP_TC_NC_ACTION_EN) :
+ (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN)) |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ seq, 0xffffffff, 4);
+}
+
+static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+}
+
+static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx9 now */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
+static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ struct amdgpu_device *adev, int me, int pipe,
+ enum amdgpu_interrupt_state state, int xcc_id)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ switch (type) {
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 3, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 3, state, i);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i, xcc_id;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return -EINVAL;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i, xcc_id;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int cp_coher_cntl =
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+ /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
+static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring,
+ uint32_t pipe, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ uint32_t wcl_cs_reg;
+
+ /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+ val = enable ? 0x1 : 0x7f;
+
+ switch (pipe) {
+ case 0:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS0);
+ break;
+ case 1:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS1);
+ break;
+ case 2:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS2);
+ break;
+ case 3:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS3);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+
+ amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ int i;
+
+ /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : 0x07ffffff;
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_GFX),
+ val);
+
+ /* Restrict waves for normal/low priority compute queues as well
+ * to get best QoS for high priority compute jobs.
+ *
+ * amdgpu controls only 1st ME(0-3 CS pipes).
+ */
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ if (i != ring->pipe)
+ gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable);
+
+ }
+}
+
+enum amdgpu_gfx_cp_ras_mem_id {
+ AMDGPU_GFX_CP_MEM1 = 1,
+ AMDGPU_GFX_CP_MEM2,
+ AMDGPU_GFX_CP_MEM3,
+ AMDGPU_GFX_CP_MEM4,
+ AMDGPU_GFX_CP_MEM5,
+};
+
+enum amdgpu_gfx_gcea_ras_mem_id {
+ AMDGPU_GFX_GCEA_IOWR_CMDMEM = 4,
+ AMDGPU_GFX_GCEA_IORD_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIWR_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIRD_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMWR_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMRD_CMDMEM,
+ AMDGPU_GFX_GCEA_MAM_DMEM0,
+ AMDGPU_GFX_GCEA_MAM_DMEM1,
+ AMDGPU_GFX_GCEA_MAM_DMEM2,
+ AMDGPU_GFX_GCEA_MAM_DMEM3,
+ AMDGPU_GFX_GCEA_MAM_AMEM0,
+ AMDGPU_GFX_GCEA_MAM_AMEM1,
+ AMDGPU_GFX_GCEA_MAM_AMEM2,
+ AMDGPU_GFX_GCEA_MAM_AMEM3,
+ AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER,
+ AMDGPU_GFX_GCEA_WRET_TAGMEM,
+ AMDGPU_GFX_GCEA_RRET_TAGMEM,
+ AMDGPU_GFX_GCEA_IOWR_DATAMEM,
+ AMDGPU_GFX_GCEA_GMIWR_DATAMEM,
+ AMDGPU_GFX_GCEA_DRAM_DATAMEM,
+};
+
+enum amdgpu_gfx_gc_cane_ras_mem_id {
+ AMDGPU_GFX_GC_CANE_MEM0 = 0,
+};
+
+enum amdgpu_gfx_gcutcl2_ras_mem_id {
+ AMDGPU_GFX_GCUTCL2_MEM2P512X95 = 160,
+};
+
+enum amdgpu_gfx_gds_ras_mem_id {
+ AMDGPU_GFX_GDS_MEM0 = 0,
+};
+
+enum amdgpu_gfx_lds_ras_mem_id {
+ AMDGPU_GFX_LDS_BANK0 = 0,
+ AMDGPU_GFX_LDS_BANK1,
+ AMDGPU_GFX_LDS_BANK2,
+ AMDGPU_GFX_LDS_BANK3,
+ AMDGPU_GFX_LDS_BANK4,
+ AMDGPU_GFX_LDS_BANK5,
+ AMDGPU_GFX_LDS_BANK6,
+ AMDGPU_GFX_LDS_BANK7,
+ AMDGPU_GFX_LDS_BANK8,
+ AMDGPU_GFX_LDS_BANK9,
+ AMDGPU_GFX_LDS_BANK10,
+ AMDGPU_GFX_LDS_BANK11,
+ AMDGPU_GFX_LDS_BANK12,
+ AMDGPU_GFX_LDS_BANK13,
+ AMDGPU_GFX_LDS_BANK14,
+ AMDGPU_GFX_LDS_BANK15,
+ AMDGPU_GFX_LDS_BANK16,
+ AMDGPU_GFX_LDS_BANK17,
+ AMDGPU_GFX_LDS_BANK18,
+ AMDGPU_GFX_LDS_BANK19,
+ AMDGPU_GFX_LDS_BANK20,
+ AMDGPU_GFX_LDS_BANK21,
+ AMDGPU_GFX_LDS_BANK22,
+ AMDGPU_GFX_LDS_BANK23,
+ AMDGPU_GFX_LDS_BANK24,
+ AMDGPU_GFX_LDS_BANK25,
+ AMDGPU_GFX_LDS_BANK26,
+ AMDGPU_GFX_LDS_BANK27,
+ AMDGPU_GFX_LDS_BANK28,
+ AMDGPU_GFX_LDS_BANK29,
+ AMDGPU_GFX_LDS_BANK30,
+ AMDGPU_GFX_LDS_BANK31,
+ AMDGPU_GFX_LDS_SP_BUFFER_A,
+ AMDGPU_GFX_LDS_SP_BUFFER_B,
+};
+
+enum amdgpu_gfx_rlc_ras_mem_id {
+ AMDGPU_GFX_RLC_GPMF32 = 1,
+ AMDGPU_GFX_RLC_RLCVF32,
+ AMDGPU_GFX_RLC_SCRATCH,
+ AMDGPU_GFX_RLC_SRM_ARAM,
+ AMDGPU_GFX_RLC_SRM_DRAM,
+ AMDGPU_GFX_RLC_TCTAG,
+ AMDGPU_GFX_RLC_SPM_SE,
+ AMDGPU_GFX_RLC_SPM_GRBMT,
+};
+
+enum amdgpu_gfx_sp_ras_mem_id {
+ AMDGPU_GFX_SP_SIMDID0 = 0,
+};
+
+enum amdgpu_gfx_spi_ras_mem_id {
+ AMDGPU_GFX_SPI_MEM0 = 0,
+ AMDGPU_GFX_SPI_MEM1,
+ AMDGPU_GFX_SPI_MEM2,
+ AMDGPU_GFX_SPI_MEM3,
+};
+
+enum amdgpu_gfx_sqc_ras_mem_id {
+ AMDGPU_GFX_SQC_INST_CACHE_A = 100,
+ AMDGPU_GFX_SQC_INST_CACHE_B = 101,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_A = 102,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_B = 103,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A = 104,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B = 105,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A = 106,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B = 107,
+ AMDGPU_GFX_SQC_DATA_CACHE_A = 200,
+ AMDGPU_GFX_SQC_DATA_CACHE_B = 201,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_A = 202,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_B = 203,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A = 204,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B = 205,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A = 206,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B = 207,
+ AMDGPU_GFX_SQC_DIRTY_BIT_A = 208,
+ AMDGPU_GFX_SQC_DIRTY_BIT_B = 209,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0 = 210,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1 = 211,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A = 212,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B = 213,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE = 108,
+};
+
+enum amdgpu_gfx_sq_ras_mem_id {
+ AMDGPU_GFX_SQ_SGPR_MEM0 = 0,
+ AMDGPU_GFX_SQ_SGPR_MEM1,
+ AMDGPU_GFX_SQ_SGPR_MEM2,
+ AMDGPU_GFX_SQ_SGPR_MEM3,
+};
+
+enum amdgpu_gfx_ta_ras_mem_id {
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_LO = 1,
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_HI,
+ AMDGPU_GFX_TA_FS_CFIFO_RAM,
+ AMDGPU_GFX_TA_FSX_LFIFO,
+ AMDGPU_GFX_TA_FS_DFIFO_RAM,
+};
+
+enum amdgpu_gfx_tcc_ras_mem_id {
+ AMDGPU_GFX_TCC_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tca_ras_mem_id {
+ AMDGPU_GFX_TCA_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tci_ras_mem_id {
+ AMDGPU_GFX_TCIW_MEM = 1,
+};
+
+enum amdgpu_gfx_tcp_ras_mem_id {
+ AMDGPU_GFX_TCP_LFIFO0 = 1,
+ AMDGPU_GFX_TCP_SET0BANK0_RAM,
+ AMDGPU_GFX_TCP_SET0BANK1_RAM,
+ AMDGPU_GFX_TCP_SET0BANK2_RAM,
+ AMDGPU_GFX_TCP_SET0BANK3_RAM,
+ AMDGPU_GFX_TCP_SET1BANK0_RAM,
+ AMDGPU_GFX_TCP_SET1BANK1_RAM,
+ AMDGPU_GFX_TCP_SET1BANK2_RAM,
+ AMDGPU_GFX_TCP_SET1BANK3_RAM,
+ AMDGPU_GFX_TCP_SET2BANK0_RAM,
+ AMDGPU_GFX_TCP_SET2BANK1_RAM,
+ AMDGPU_GFX_TCP_SET2BANK2_RAM,
+ AMDGPU_GFX_TCP_SET2BANK3_RAM,
+ AMDGPU_GFX_TCP_SET3BANK0_RAM,
+ AMDGPU_GFX_TCP_SET3BANK1_RAM,
+ AMDGPU_GFX_TCP_SET3BANK2_RAM,
+ AMDGPU_GFX_TCP_SET3BANK3_RAM,
+ AMDGPU_GFX_TCP_VM_FIFO,
+ AMDGPU_GFX_TCP_DB_TAGRAM0,
+ AMDGPU_GFX_TCP_DB_TAGRAM1,
+ AMDGPU_GFX_TCP_DB_TAGRAM2,
+ AMDGPU_GFX_TCP_DB_TAGRAM3,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1,
+ AMDGPU_GFX_TCP_CMD_FIFO,
+};
+
+enum amdgpu_gfx_td_ras_mem_id {
+ AMDGPU_GFX_TD_UTD_CS_FIFO_MEM = 1,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM,
+};
+
+enum amdgpu_gfx_tcx_ras_mem_id {
+ AMDGPU_GFX_TCX_FIFOD0 = 0,
+ AMDGPU_GFX_TCX_FIFOD1,
+ AMDGPU_GFX_TCX_FIFOD2,
+ AMDGPU_GFX_TCX_FIFOD3,
+ AMDGPU_GFX_TCX_FIFOD4,
+ AMDGPU_GFX_TCX_FIFOD5,
+ AMDGPU_GFX_TCX_FIFOD6,
+ AMDGPU_GFX_TCX_FIFOD7,
+ AMDGPU_GFX_TCX_FIFOB0,
+ AMDGPU_GFX_TCX_FIFOB1,
+ AMDGPU_GFX_TCX_FIFOB2,
+ AMDGPU_GFX_TCX_FIFOB3,
+ AMDGPU_GFX_TCX_FIFOB4,
+ AMDGPU_GFX_TCX_FIFOB5,
+ AMDGPU_GFX_TCX_FIFOB6,
+ AMDGPU_GFX_TCX_FIFOB7,
+ AMDGPU_GFX_TCX_FIFOA0,
+ AMDGPU_GFX_TCX_FIFOA1,
+ AMDGPU_GFX_TCX_FIFOA2,
+ AMDGPU_GFX_TCX_FIFOA3,
+ AMDGPU_GFX_TCX_FIFOA4,
+ AMDGPU_GFX_TCX_FIFOA5,
+ AMDGPU_GFX_TCX_FIFOA6,
+ AMDGPU_GFX_TCX_FIFOA7,
+ AMDGPU_GFX_TCX_CFIFO0,
+ AMDGPU_GFX_TCX_CFIFO1,
+ AMDGPU_GFX_TCX_CFIFO2,
+ AMDGPU_GFX_TCX_CFIFO3,
+ AMDGPU_GFX_TCX_CFIFO4,
+ AMDGPU_GFX_TCX_CFIFO5,
+ AMDGPU_GFX_TCX_CFIFO6,
+ AMDGPU_GFX_TCX_CFIFO7,
+ AMDGPU_GFX_TCX_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_FIFO_ACKD7,
+ AMDGPU_GFX_TCX_DST_FIFOA0,
+ AMDGPU_GFX_TCX_DST_FIFOA1,
+ AMDGPU_GFX_TCX_DST_FIFOA2,
+ AMDGPU_GFX_TCX_DST_FIFOA3,
+ AMDGPU_GFX_TCX_DST_FIFOA4,
+ AMDGPU_GFX_TCX_DST_FIFOA5,
+ AMDGPU_GFX_TCX_DST_FIFOA6,
+ AMDGPU_GFX_TCX_DST_FIFOA7,
+ AMDGPU_GFX_TCX_DST_FIFOB0,
+ AMDGPU_GFX_TCX_DST_FIFOB1,
+ AMDGPU_GFX_TCX_DST_FIFOB2,
+ AMDGPU_GFX_TCX_DST_FIFOB3,
+ AMDGPU_GFX_TCX_DST_FIFOB4,
+ AMDGPU_GFX_TCX_DST_FIFOB5,
+ AMDGPU_GFX_TCX_DST_FIFOB6,
+ AMDGPU_GFX_TCX_DST_FIFOB7,
+ AMDGPU_GFX_TCX_DST_FIFOD0,
+ AMDGPU_GFX_TCX_DST_FIFOD1,
+ AMDGPU_GFX_TCX_DST_FIFOD2,
+ AMDGPU_GFX_TCX_DST_FIFOD3,
+ AMDGPU_GFX_TCX_DST_FIFOD4,
+ AMDGPU_GFX_TCX_DST_FIFOD5,
+ AMDGPU_GFX_TCX_DST_FIFOD6,
+ AMDGPU_GFX_TCX_DST_FIFOD7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD7,
+};
+
+enum amdgpu_gfx_atc_l2_ras_mem_id {
+ AMDGPU_GFX_ATC_L2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_utcl2_ras_mem_id {
+ AMDGPU_GFX_UTCL2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_ras_mem_id {
+ AMDGPU_GFX_VML2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_walker_ras_mem_id {
+ AMDGPU_GFX_VML2_WALKER_MEM0 = 0,
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_cp_mem_list[] = {
+ {AMDGPU_GFX_CP_MEM1, "CP_MEM1"},
+ {AMDGPU_GFX_CP_MEM2, "CP_MEM2"},
+ {AMDGPU_GFX_CP_MEM3, "CP_MEM3"},
+ {AMDGPU_GFX_CP_MEM4, "CP_MEM4"},
+ {AMDGPU_GFX_CP_MEM5, "CP_MEM5"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcea_mem_list[] = {
+ {AMDGPU_GFX_GCEA_IOWR_CMDMEM, "GCEA_IOWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_IORD_CMDMEM, "GCEA_IORD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_CMDMEM, "GCEA_GMIWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIRD_CMDMEM, "GCEA_GMIRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, "GCEA_DRAMWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, "GCEA_DRAMRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM0, "GCEA_MAM_DMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM1, "GCEA_MAM_DMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM2, "GCEA_MAM_DMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM3, "GCEA_MAM_DMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM0, "GCEA_MAM_AMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM1, "GCEA_MAM_AMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM2, "GCEA_MAM_AMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM3, "GCEA_MAM_AMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, "GCEA_MAM_AFLUSH_BUFFER"},
+ {AMDGPU_GFX_GCEA_WRET_TAGMEM, "GCEA_WRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_RRET_TAGMEM, "GCEA_RRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_IOWR_DATAMEM, "GCEA_IOWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_DATAMEM, "GCEA_GMIWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_DRAM_DATAMEM, "GCEA_DRAM_DATAMEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gc_cane_mem_list[] = {
+ {AMDGPU_GFX_GC_CANE_MEM0, "GC_CANE_MEM0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcutcl2_mem_list[] = {
+ {AMDGPU_GFX_GCUTCL2_MEM2P512X95, "GCUTCL2_MEM2P512X95"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gds_mem_list[] = {
+ {AMDGPU_GFX_GDS_MEM0, "GDS_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_lds_mem_list[] = {
+ {AMDGPU_GFX_LDS_BANK0, "LDS_BANK0"},
+ {AMDGPU_GFX_LDS_BANK1, "LDS_BANK1"},
+ {AMDGPU_GFX_LDS_BANK2, "LDS_BANK2"},
+ {AMDGPU_GFX_LDS_BANK3, "LDS_BANK3"},
+ {AMDGPU_GFX_LDS_BANK4, "LDS_BANK4"},
+ {AMDGPU_GFX_LDS_BANK5, "LDS_BANK5"},
+ {AMDGPU_GFX_LDS_BANK6, "LDS_BANK6"},
+ {AMDGPU_GFX_LDS_BANK7, "LDS_BANK7"},
+ {AMDGPU_GFX_LDS_BANK8, "LDS_BANK8"},
+ {AMDGPU_GFX_LDS_BANK9, "LDS_BANK9"},
+ {AMDGPU_GFX_LDS_BANK10, "LDS_BANK10"},
+ {AMDGPU_GFX_LDS_BANK11, "LDS_BANK11"},
+ {AMDGPU_GFX_LDS_BANK12, "LDS_BANK12"},
+ {AMDGPU_GFX_LDS_BANK13, "LDS_BANK13"},
+ {AMDGPU_GFX_LDS_BANK14, "LDS_BANK14"},
+ {AMDGPU_GFX_LDS_BANK15, "LDS_BANK15"},
+ {AMDGPU_GFX_LDS_BANK16, "LDS_BANK16"},
+ {AMDGPU_GFX_LDS_BANK17, "LDS_BANK17"},
+ {AMDGPU_GFX_LDS_BANK18, "LDS_BANK18"},
+ {AMDGPU_GFX_LDS_BANK19, "LDS_BANK19"},
+ {AMDGPU_GFX_LDS_BANK20, "LDS_BANK20"},
+ {AMDGPU_GFX_LDS_BANK21, "LDS_BANK21"},
+ {AMDGPU_GFX_LDS_BANK22, "LDS_BANK22"},
+ {AMDGPU_GFX_LDS_BANK23, "LDS_BANK23"},
+ {AMDGPU_GFX_LDS_BANK24, "LDS_BANK24"},
+ {AMDGPU_GFX_LDS_BANK25, "LDS_BANK25"},
+ {AMDGPU_GFX_LDS_BANK26, "LDS_BANK26"},
+ {AMDGPU_GFX_LDS_BANK27, "LDS_BANK27"},
+ {AMDGPU_GFX_LDS_BANK28, "LDS_BANK28"},
+ {AMDGPU_GFX_LDS_BANK29, "LDS_BANK29"},
+ {AMDGPU_GFX_LDS_BANK30, "LDS_BANK30"},
+ {AMDGPU_GFX_LDS_BANK31, "LDS_BANK31"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_A, "LDS_SP_BUFFER_A"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_B, "LDS_SP_BUFFER_B"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_rlc_mem_list[] = {
+ {AMDGPU_GFX_RLC_GPMF32, "RLC_GPMF32"},
+ {AMDGPU_GFX_RLC_RLCVF32, "RLC_RLCVF32"},
+ {AMDGPU_GFX_RLC_SCRATCH, "RLC_SCRATCH"},
+ {AMDGPU_GFX_RLC_SRM_ARAM, "RLC_SRM_ARAM"},
+ {AMDGPU_GFX_RLC_SRM_DRAM, "RLC_SRM_DRAM"},
+ {AMDGPU_GFX_RLC_TCTAG, "RLC_TCTAG"},
+ {AMDGPU_GFX_RLC_SPM_SE, "RLC_SPM_SE"},
+ {AMDGPU_GFX_RLC_SPM_GRBMT, "RLC_SPM_GRBMT"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sp_mem_list[] = {
+ {AMDGPU_GFX_SP_SIMDID0, "SP_SIMDID0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_spi_mem_list[] = {
+ {AMDGPU_GFX_SPI_MEM0, "SPI_MEM0"},
+ {AMDGPU_GFX_SPI_MEM1, "SPI_MEM1"},
+ {AMDGPU_GFX_SPI_MEM2, "SPI_MEM2"},
+ {AMDGPU_GFX_SPI_MEM3, "SPI_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sqc_mem_list[] = {
+ {AMDGPU_GFX_SQC_INST_CACHE_A, "SQC_INST_CACHE_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_B, "SQC_INST_CACHE_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_A, "SQC_INST_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_B, "SQC_INST_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A, "SQC_INST_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B, "SQC_INST_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A, "SQC_INST_CACHE_GATCL1_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B, "SQC_INST_CACHE_GATCL1_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_A, "SQC_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_B, "SQC_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_A, "SQC_DATA_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_B, "SQC_DATA_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A, "SQC_DATA_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B, "SQC_DATA_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A, "SQC_DATA_CACHE_HIT_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B, "SQC_DATA_CACHE_HIT_FIFO_B"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_A, "SQC_DIRTY_BIT_A"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_B, "SQC_DIRTY_BIT_B"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0, "SQC_WRITE_DATA_BUFFER_CU0"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1, "SQC_WRITE_DATA_BUFFER_CU1"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE, "SQC_UTCL1_MISS_LFIFO_INST_CACHE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sq_mem_list[] = {
+ {AMDGPU_GFX_SQ_SGPR_MEM0, "SQ_SGPR_MEM0"},
+ {AMDGPU_GFX_SQ_SGPR_MEM1, "SQ_SGPR_MEM1"},
+ {AMDGPU_GFX_SQ_SGPR_MEM2, "SQ_SGPR_MEM2"},
+ {AMDGPU_GFX_SQ_SGPR_MEM3, "SQ_SGPR_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_ta_mem_list[] = {
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_LO, "TA_FS_AFIFO_RAM_LO"},
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, "TA_FS_AFIFO_RAM_HI"},
+ {AMDGPU_GFX_TA_FS_CFIFO_RAM, "TA_FS_CFIFO_RAM"},
+ {AMDGPU_GFX_TA_FSX_LFIFO, "TA_FSX_LFIFO"},
+ {AMDGPU_GFX_TA_FS_DFIFO_RAM, "TA_FS_DFIFO_RAM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcc_mem_list[] = {
+ {AMDGPU_GFX_TCC_MEM1, "TCC_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tca_mem_list[] = {
+ {AMDGPU_GFX_TCA_MEM1, "TCA_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tci_mem_list[] = {
+ {AMDGPU_GFX_TCIW_MEM, "TCIW_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcp_mem_list[] = {
+ {AMDGPU_GFX_TCP_LFIFO0, "TCP_LFIFO0"},
+ {AMDGPU_GFX_TCP_SET0BANK0_RAM, "TCP_SET0BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK1_RAM, "TCP_SET0BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK2_RAM, "TCP_SET0BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK3_RAM, "TCP_SET0BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK0_RAM, "TCP_SET1BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK1_RAM, "TCP_SET1BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK2_RAM, "TCP_SET1BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK3_RAM, "TCP_SET1BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK0_RAM, "TCP_SET2BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK1_RAM, "TCP_SET2BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK2_RAM, "TCP_SET2BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK3_RAM, "TCP_SET2BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK0_RAM, "TCP_SET3BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK1_RAM, "TCP_SET3BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK2_RAM, "TCP_SET3BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK3_RAM, "TCP_SET3BANK3_RAM"},
+ {AMDGPU_GFX_TCP_VM_FIFO, "TCP_VM_FIFO"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM0, "TCP_DB_TAGRAM0"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM1, "TCP_DB_TAGRAM1"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM2, "TCP_DB_TAGRAM2"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM3, "TCP_DB_TAGRAM3"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, "TCP_UTCL1_LFIFO_PROBE0"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, "TCP_UTCL1_LFIFO_PROBE1"},
+ {AMDGPU_GFX_TCP_CMD_FIFO, "TCP_CMD_FIFO"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_td_mem_list[] = {
+ {AMDGPU_GFX_TD_UTD_CS_FIFO_MEM, "TD_UTD_CS_FIFO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, "TD_UTD_SS_FIFO_LO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, "TD_UTD_SS_FIFO_HI_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcx_mem_list[] = {
+ {AMDGPU_GFX_TCX_FIFOD0, "TCX_FIFOD0"},
+ {AMDGPU_GFX_TCX_FIFOD1, "TCX_FIFOD1"},
+ {AMDGPU_GFX_TCX_FIFOD2, "TCX_FIFOD2"},
+ {AMDGPU_GFX_TCX_FIFOD3, "TCX_FIFOD3"},
+ {AMDGPU_GFX_TCX_FIFOD4, "TCX_FIFOD4"},
+ {AMDGPU_GFX_TCX_FIFOD5, "TCX_FIFOD5"},
+ {AMDGPU_GFX_TCX_FIFOD6, "TCX_FIFOD6"},
+ {AMDGPU_GFX_TCX_FIFOD7, "TCX_FIFOD7"},
+ {AMDGPU_GFX_TCX_FIFOB0, "TCX_FIFOB0"},
+ {AMDGPU_GFX_TCX_FIFOB1, "TCX_FIFOB1"},
+ {AMDGPU_GFX_TCX_FIFOB2, "TCX_FIFOB2"},
+ {AMDGPU_GFX_TCX_FIFOB3, "TCX_FIFOB3"},
+ {AMDGPU_GFX_TCX_FIFOB4, "TCX_FIFOB4"},
+ {AMDGPU_GFX_TCX_FIFOB5, "TCX_FIFOB5"},
+ {AMDGPU_GFX_TCX_FIFOB6, "TCX_FIFOB6"},
+ {AMDGPU_GFX_TCX_FIFOB7, "TCX_FIFOB7"},
+ {AMDGPU_GFX_TCX_FIFOA0, "TCX_FIFOA0"},
+ {AMDGPU_GFX_TCX_FIFOA1, "TCX_FIFOA1"},
+ {AMDGPU_GFX_TCX_FIFOA2, "TCX_FIFOA2"},
+ {AMDGPU_GFX_TCX_FIFOA3, "TCX_FIFOA3"},
+ {AMDGPU_GFX_TCX_FIFOA4, "TCX_FIFOA4"},
+ {AMDGPU_GFX_TCX_FIFOA5, "TCX_FIFOA5"},
+ {AMDGPU_GFX_TCX_FIFOA6, "TCX_FIFOA6"},
+ {AMDGPU_GFX_TCX_FIFOA7, "TCX_FIFOA7"},
+ {AMDGPU_GFX_TCX_CFIFO0, "TCX_CFIFO0"},
+ {AMDGPU_GFX_TCX_CFIFO1, "TCX_CFIFO1"},
+ {AMDGPU_GFX_TCX_CFIFO2, "TCX_CFIFO2"},
+ {AMDGPU_GFX_TCX_CFIFO3, "TCX_CFIFO3"},
+ {AMDGPU_GFX_TCX_CFIFO4, "TCX_CFIFO4"},
+ {AMDGPU_GFX_TCX_CFIFO5, "TCX_CFIFO5"},
+ {AMDGPU_GFX_TCX_CFIFO6, "TCX_CFIFO6"},
+ {AMDGPU_GFX_TCX_CFIFO7, "TCX_CFIFO7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB0, "TCX_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB1, "TCX_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB2, "TCX_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB3, "TCX_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB4, "TCX_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB5, "TCX_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB6, "TCX_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB7, "TCX_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD0, "TCX_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD1, "TCX_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD2, "TCX_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD3, "TCX_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD4, "TCX_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD5, "TCX_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD6, "TCX_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD7, "TCX_FIFO_ACKD7"},
+ {AMDGPU_GFX_TCX_DST_FIFOA0, "TCX_DST_FIFOA0"},
+ {AMDGPU_GFX_TCX_DST_FIFOA1, "TCX_DST_FIFOA1"},
+ {AMDGPU_GFX_TCX_DST_FIFOA2, "TCX_DST_FIFOA2"},
+ {AMDGPU_GFX_TCX_DST_FIFOA3, "TCX_DST_FIFOA3"},
+ {AMDGPU_GFX_TCX_DST_FIFOA4, "TCX_DST_FIFOA4"},
+ {AMDGPU_GFX_TCX_DST_FIFOA5, "TCX_DST_FIFOA5"},
+ {AMDGPU_GFX_TCX_DST_FIFOA6, "TCX_DST_FIFOA6"},
+ {AMDGPU_GFX_TCX_DST_FIFOA7, "TCX_DST_FIFOA7"},
+ {AMDGPU_GFX_TCX_DST_FIFOB0, "TCX_DST_FIFOB0"},
+ {AMDGPU_GFX_TCX_DST_FIFOB1, "TCX_DST_FIFOB1"},
+ {AMDGPU_GFX_TCX_DST_FIFOB2, "TCX_DST_FIFOB2"},
+ {AMDGPU_GFX_TCX_DST_FIFOB3, "TCX_DST_FIFOB3"},
+ {AMDGPU_GFX_TCX_DST_FIFOB4, "TCX_DST_FIFOB4"},
+ {AMDGPU_GFX_TCX_DST_FIFOB5, "TCX_DST_FIFOB5"},
+ {AMDGPU_GFX_TCX_DST_FIFOB6, "TCX_DST_FIFOB6"},
+ {AMDGPU_GFX_TCX_DST_FIFOB7, "TCX_DST_FIFOB7"},
+ {AMDGPU_GFX_TCX_DST_FIFOD0, "TCX_DST_FIFOD0"},
+ {AMDGPU_GFX_TCX_DST_FIFOD1, "TCX_DST_FIFOD1"},
+ {AMDGPU_GFX_TCX_DST_FIFOD2, "TCX_DST_FIFOD2"},
+ {AMDGPU_GFX_TCX_DST_FIFOD3, "TCX_DST_FIFOD3"},
+ {AMDGPU_GFX_TCX_DST_FIFOD4, "TCX_DST_FIFOD4"},
+ {AMDGPU_GFX_TCX_DST_FIFOD5, "TCX_DST_FIFOD5"},
+ {AMDGPU_GFX_TCX_DST_FIFOD6, "TCX_DST_FIFOD6"},
+ {AMDGPU_GFX_TCX_DST_FIFOD7, "TCX_DST_FIFOD7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB0, "TCX_DST_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB1, "TCX_DST_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB2, "TCX_DST_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB3, "TCX_DST_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB4, "TCX_DST_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB5, "TCX_DST_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB6, "TCX_DST_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB7, "TCX_DST_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD0, "TCX_DST_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD1, "TCX_DST_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD2, "TCX_DST_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD3, "TCX_DST_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD4, "TCX_DST_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD5, "TCX_DST_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD6, "TCX_DST_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD7, "TCX_DST_FIFO_ACKD7"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_atc_l2_mem_list[] = {
+ {AMDGPU_GFX_ATC_L2_MEM, "ATC_L2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_utcl2_mem_list[] = {
+ {AMDGPU_GFX_UTCL2_MEM, "UTCL2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_mem_list[] = {
+ {AMDGPU_GFX_VML2_MEM, "VML2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_walker_mem_list[] = {
+ {AMDGPU_GFX_VML2_WALKER_MEM, "VML2_WALKER_MEM"},
+};
+
+static const struct amdgpu_gfx_ras_mem_id_entry gfx_v9_4_3_ras_mem_list_array[AMDGPU_GFX_MEM_TYPE_NUM] = {
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_cp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcea_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gc_cane_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcutcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_lds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_rlc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_spi_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sqc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sq_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_ta_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tca_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tci_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_td_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcx_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_atc_l2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_utcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_walker_mem_list)
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_CE_ERR_STATUS_LOW, regRLC_CE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_CE_ERR_STATUS_LO, regCPC_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_CE_ERR_STATUS_LO, regCPF_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_CE_ERR_STATUS_LO, regCPG_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_CE_ERR_STATUS_LO, regGDS_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_CE_ERR_STATUS_LO, regGC_CANE_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_CE_ERR_STATUS_LO, regTCC_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG),
+ 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 1},
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_UE_ERR_STATUS_LOW, regRLC_UE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_UE_ERR_STATUS_LO, regCPC_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_UE_ERR_STATUS_LO, regCPF_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_UE_ERR_STATUS_LO, regCPG_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_UE_ERR_STATUS_LO, regGDS_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_UE_ERR_STATUS_LO, regGC_CANE_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_UE_ERR_STATUS_LO, regTCC_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG),
+ 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"},
+ AMDGPU_GFX_TCA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_UE_ERR_STATUS_LO, regGCEA_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 1},
+};
+
+static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+};
+
+static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ unsigned long ce_count = 0, ue_count = 0;
+ uint32_t i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &ce_count);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ /* the caller should make sure initialize value of
+ * err_data->ue_count and err_data->ce_count
+ */
+ err_data->ce_count += ce_count;
+ err_data->ue_count += ue_count;
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t i, j;
+ uint32_t reg_value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
+ reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGCEA_ERR_STATUS);
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "GCEA err detected at instance: %d, status: 0x%x!\n",
+ j, reg_value);
+ }
+ /* clear after read */
+ reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS,
+ reg_value);
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regVML2_WALKER_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS,
+ 0x3);
+ }
+}
+
+static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev,
+ uint32_t status, int xcc_id)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ uint32_t i, simd, wave;
+ uint32_t wave_status;
+ uint32_t wave_pc_lo, wave_pc_hi;
+ uint32_t wave_exec_lo, wave_exec_hi;
+ uint32_t wave_inst_dw0, wave_inst_dw1;
+ uint32_t wave_ib_sts;
+
+ for (i = 0; i < 32; i++) {
+ if (!((i << 1) & status))
+ continue;
+
+ simd = i / cu_info->max_waves_per_simd;
+ wave = i % cu_info->max_waves_per_simd;
+
+ wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
+ wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
+ wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
+ wave_exec_lo =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
+ wave_exec_hi =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
+ wave_inst_dw0 =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
+ wave_inst_dw1 =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
+ wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
+
+ dev_info(
+ adev->dev,
+ "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
+ simd, wave, wave_status,
+ ((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
+ ((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
+ ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
+ wave_ib_sts);
+ }
+}
+
+static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+ uint32_t status;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
+ for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx, xcc_id);
+ status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSQ_TIMEOUT_STATUS);
+ if (status != 0) {
+ dev_info(
+ adev->dev,
+ "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
+ se_idx, sh_idx, cu_idx);
+ gfx_v9_4_3_log_cu_timeout_status(
+ adev, status, xcc_id);
+ }
+ /* clear old status */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+}
+
+static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t i, j;
+ uint32_t value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
+ value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS);
+ value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value);
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
+ for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx, xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_gfx_ras_error_func(adev, ras_error_status,
+ gfx_v9_4_3_inst_query_ras_err_count);
+}
+
+static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
+}
+
+static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
+}
+
+static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
+}
+
+static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
+ .name = "gfx_v9_4_3",
+ .early_init = gfx_v9_4_3_early_init,
+ .late_init = gfx_v9_4_3_late_init,
+ .sw_init = gfx_v9_4_3_sw_init,
+ .sw_fini = gfx_v9_4_3_sw_fini,
+ .hw_init = gfx_v9_4_3_hw_init,
+ .hw_fini = gfx_v9_4_3_hw_fini,
+ .suspend = gfx_v9_4_3_suspend,
+ .resume = gfx_v9_4_3_resume,
+ .is_idle = gfx_v9_4_3_is_idle,
+ .wait_for_idle = gfx_v9_4_3_wait_for_idle,
+ .soft_reset = gfx_v9_4_3_soft_reset,
+ .set_clockgating_state = gfx_v9_4_3_set_clockgating_state,
+ .set_powergating_state = gfx_v9_4_3_set_powergating_state,
+ .get_clockgating_state = gfx_v9_4_3_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */
+ 7 + /* gfx_v9_4_3_emit_mem_sync */
+ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */
+ 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_4_3_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .test_ib = gfx_v9_4_3_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v9_4_3_emit_mem_sync,
+ .emit_wave_limit = gfx_v9_4_3_emit_wave_limit,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v9_4_3_ring_emit_rreg,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++)
+ adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs
+ = &gfx_v9_4_3_ring_funcs_compute;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = {
+ .set = gfx_v9_4_3_set_eop_interrupt_state,
+ .process = gfx_v9_4_3_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_reg_fault_state,
+ .process = gfx_v9_4_3_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_inst_fault_state,
+ .process = gfx_v9_4_3_priv_inst_irq,
+};
+
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs;
+}
+
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs;
+}
+
+
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asci gds info */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ /* 9.4.3 removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+ adev->gds.gds_size = 0;
+ break;
+ default:
+ adev->gds.gds_size = 0x10000;
+ break;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ /* deprecated for 9.4.3, no usage at all */
+ adev->gds.gds_compute_max_wave_id = 0;
+ break;
+ default:
+ /* this really depends on the chip */
+ adev->gds.gds_compute_max_wave_id = 0x7ff;
+ break;
+ }
+
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ unsigned disable_masks[4 * 4];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ /*
+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
+ */
+ if (adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se > 16)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
+ gfx_v9_4_3_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
+ bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
+
+ /*
+ * The bitmap(and ao_cu_bitmap) in cu_info structure is
+ * 4x4 size array, and it's usually suitable for Vega
+ * ASICs which has 4*2 SE/SH layout.
+ * But for Arcturus, SE/SH layout is changed to 8*1.
+ * To mostly reduce the impact, we make it compatible
+ * with current bitmap array as below:
+ * SE4,SH0 --> bitmap[0][1]
+ * SE5,SH0 --> bitmap[1][1]
+ * SE6,SH0 --> bitmap[2][1]
+ * SE7,SH0 --> bitmap[3][1]
+ */
+ cu_info->bitmap[i % 4][j + i / 4] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 9,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &gfx_v9_4_3_ip_funcs,
+};
+
+static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask;
+ int i, r;
+
+ /* TODO : Initialize golden regs */
+ /* gfx_v9_4_3_init_golden_registers(adev); */
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for_each_inst(i, inst_mask)
+ gfx_v9_4_3_xcc_fini(adev, i);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
+ .suspend = &gfx_v9_4_3_xcp_suspend,
+ .resume = &gfx_v9_4_3_xcp_resume
+};
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status,
+ .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_3_ras_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
index 84e69701b81a..42d67ee0e7ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
@@ -24,7 +24,8 @@
#ifndef __GFX_V9_4_3_H__
#define __GFX_V9_4_3_H__
-extern const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs;
-extern const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs;
+extern const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs;
#endif /* __GFX_V9_4_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index ab2325f6c7ac..d94cc1ec7242 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -40,7 +40,7 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -247,7 +247,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@@ -307,7 +307,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned i;
for (i = 0 ; i < 18; ++i) {
@@ -338,7 +338,7 @@ static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -411,7 +411,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
static void gfxhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index c59c6c85fbff..4dabf910334b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -21,6 +21,7 @@
*
*/
#include "amdgpu.h"
+#include "amdgpu_xcp.h"
#include "gfxhub_v1_2.h"
#include "gfxhub_v1_1.h"
@@ -35,227 +36,288 @@
static u64 gfxhub_v1_2_get_mc_fb_offset(struct amdgpu_device *adev)
{
- return (u64)RREG32_SOC15(GC, 0, regMC_VM_FB_OFFSET) << 24;
+ return (u64)RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v1_2_xcc_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
}
static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
-
- WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- hub->ctx_addr_distance * vmid,
- lower_32_bits(page_table_base));
+ uint32_t xcc_mask;
- WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- hub->ctx_addr_distance * vmid,
- upper_32_bits(page_table_base));
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, xcc_mask);
}
-static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
uint64_t pt_base;
+ int i;
if (adev->gmc.pdb0_bo)
pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
else
pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
- gfxhub_v1_2_setup_vm_pt_regs(adev, 0, pt_base);
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask);
/* If use GART for FB translation, vmid0 page table covers both
* vram and system memory (gart)
*/
- if (adev->gmc.pdb0_bo) {
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.fb_start >> 12));
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.fb_start >> 44));
-
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->gmc.gart_end >> 12));
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->gmc.gart_end >> 44));
- } else {
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.gart_start >> 12));
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.gart_start >> 44));
-
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->gmc.gart_end >> 12));
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->gmc.gart_end >> 44));
+ for_each_inst(i, xcc_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
}
}
-static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev)
+static void
+gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
uint64_t value;
uint32_t tmp;
+ int i;
- /* Program the AGP BAR */
- WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_BASE, 0);
- WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
- if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
- /* Program the system aperture low logical page number. */
- WREG32_SOC15_RLC(GC, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
- /*
- * Raven2 has a HW issue that it is unable to use the
- * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
- * So here is the workaround that increase system
- * aperture high address (add 1) to get rid of the VM
- * fault and hardware hang.
- */
- WREG32_SOC15_RLC(GC, 0,
- regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max((adev->gmc.fb_end >> 18) + 0x1,
- adev->gmc.agp_end >> 18));
- else
- WREG32_SOC15_RLC(GC, 0,
- regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
-
- /* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
- WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
- (u32)(value >> 12));
- WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
- (u32)(value >> 44));
-
- /* Program "protection fault". */
- WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page_addr >> 12));
- WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page_addr >> 44));
-
- tmp = RREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL2);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
- ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
- WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
- }
-
- /* In the case squeezing vram into GART aperture, we don't use
- * FB aperture and AGP aperture. Disable them.
- */
- if (adev->gmc.pdb0_bo) {
- WREG32_SOC15(GC, 0, regMC_VM_FB_LOCATION_TOP, 0);
- WREG32_SOC15(GC, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
- WREG32_SOC15(GC, 0, regMC_VM_AGP_TOP, 0);
- WREG32_SOC15(GC, 0, regMC_VM_AGP_BOT, 0xFFFFFF);
- WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
- WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ for_each_inst(i, xcc_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
}
}
-static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_init_tlb_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
uint32_t tmp;
+ int i;
- /* Setup TLB control */
- tmp = RREG32_SOC15(GC, 0, regMC_VM_MX_L1_TLB_CNTL);
-
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- ENABLE_ADVANCED_DRIVER_MODEL, 1);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- MTYPE, MTYPE_UC);/* XXX for emulation. */
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
-
- WREG32_SOC15_RLC(GC, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ for_each_inst(i, xcc_mask) {
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
}
-static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_init_cache_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
uint32_t tmp;
+ int i;
- /* Setup L2 cache */
- tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
- /* XXX for emulation, Refer to closed source code.*/
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
- 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
- WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL, tmp);
-
- tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL2);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
- WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL2, tmp);
-
- tmp = regVM_L2_CNTL3_DEFAULT;
- if (adev->gmc.translate_further) {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
- L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
- } else {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
- L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ for_each_inst(i, xcc_mask) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL4, tmp);
}
- WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL3, tmp);
-
- tmp = regVM_L2_CNTL4_DEFAULT;
- if (adev->gmc.xgmi.connected_to_cpu) {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
- } else {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
- }
- WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL4, tmp);
}
-static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_enable_system_domain(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
uint32_t tmp;
+ int i;
- tmp = RREG32_SOC15(GC, 0, regVM_CONTEXT0_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
- adev->gmc.vmid0_page_table_depth);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
- adev->gmc.vmid0_page_table_block_size);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
- RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
- WREG32_SOC15(GC, 0, regVM_CONTEXT0_CNTL, tmp);
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL, tmp);
+ }
}
-static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev)
+static void
+gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
- WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
- 0XFFFFFFFF);
- WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
- 0x0000000F);
-
- WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
- 0);
- WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
- 0);
-
- WREG32_SOC15(GC, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
- WREG32_SOC15(GC, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ int i;
+ for_each_inst(i, xcc_mask) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
}
-static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub;
unsigned num_level, block_size;
uint32_t tmp;
- int i;
+ int i, j;
num_level = adev->vm_manager.num_level;
block_size = adev->vm_manager.block_size;
@@ -264,124 +326,205 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev)
else
block_size -= 9;
- for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT1_CNTL, i);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
- num_level);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
- 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- PAGE_TABLE_BLOCK_SIZE,
- block_size);
- /* Send no-retry XNACK on fault to suppress VM fault storm.
- * On Aldebaran, XNACK can be enabled in the SQ per-process.
- * Retry faults need to be enabled for that to work.
- */
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
- !adev->gmc.noretry ||
- adev->asic_type == CHIP_ALDEBARAN);
- WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT1_CNTL,
- i * hub->ctx_distance, tmp);
- WREG32_SOC15_OFFSET(GC, 0,
- regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
- i * hub->ctx_addr_distance, 0);
- WREG32_SOC15_OFFSET(GC, 0,
- regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
- i * hub->ctx_addr_distance, 0);
- WREG32_SOC15_OFFSET(GC, 0,
- regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
- i * hub->ctx_addr_distance,
- lower_32_bits(adev->vm_manager.max_pfn - 1));
- WREG32_SOC15_OFFSET(GC, 0,
- regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
- i * hub->ctx_addr_distance,
- upper_32_bits(adev->vm_manager.max_pfn - 1));
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On 9.4.2 and 9.4.3, XNACK can be enabled in
+ * the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
}
}
-static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- unsigned i;
-
- for (i = 0 ; i < 18; ++i) {
- WREG32_SOC15_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
- i * hub->eng_addr_distance, 0xffffffff);
- WREG32_SOC15_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
- i * hub->eng_addr_distance, 0x1f);
+ struct amdgpu_vmhub *hub;
+ unsigned int i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
}
}
-static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev)
+static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
{
- if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {
- /*
- * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
- * VF copy registers so vbios post doesn't program them, for
- * SRIOV driver need to program them
- */
- WREG32_SOC15_RLC(GC, 0, regMC_VM_FB_LOCATION_BASE,
- adev->gmc.vram_start >> 24);
- WREG32_SOC15_RLC(GC, 0, regMC_VM_FB_LOCATION_TOP,
- adev->gmc.vram_end >> 24);
+ uint32_t tmp_mask;
+ int i;
+
+ tmp_mask = xcc_mask;
+ /*
+ * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ for_each_inst(i, tmp_mask) {
+ i = ffs(tmp_mask) - 1;
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
}
/* GART Enable. */
- gfxhub_v1_2_init_gart_aperture_regs(adev);
- gfxhub_v1_2_init_system_aperture_regs(adev);
- gfxhub_v1_2_init_tlb_regs(adev);
+ gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_tlb_regs(adev, xcc_mask);
if (!amdgpu_sriov_vf(adev))
- gfxhub_v1_2_init_cache_regs(adev);
+ gfxhub_v1_2_xcc_init_cache_regs(adev, xcc_mask);
- gfxhub_v1_2_enable_system_domain(adev);
+ gfxhub_v1_2_xcc_enable_system_domain(adev, xcc_mask);
if (!amdgpu_sriov_vf(adev))
- gfxhub_v1_2_disable_identity_aperture(adev);
- gfxhub_v1_2_setup_vmid_config(adev);
- gfxhub_v1_2_program_invalidation(adev);
+ gfxhub_v1_2_xcc_disable_identity_aperture(adev, xcc_mask);
+ gfxhub_v1_2_xcc_setup_vmid_config(adev, xcc_mask);
+ gfxhub_v1_2_xcc_program_invalidation(adev, xcc_mask);
return 0;
}
+static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ return gfxhub_v1_2_xcc_gart_enable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ }
+}
+
static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_gart_disable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value,
+ uint32_t xcc_mask)
+{
u32 tmp;
- u32 i;
-
- /* Disable all tables */
- for (i = 0; i < 16; i++)
- WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_CNTL,
- i * hub->ctx_distance, 0);
-
- /* Setup TLB control */
- tmp = RREG32_SOC15(GC, 0, regMC_VM_MX_L1_TLB_CNTL);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
- tmp = REG_SET_FIELD(tmp,
- MC_VM_MX_L1_TLB_CNTL,
- ENABLE_ADVANCED_DRIVER_MODEL,
- 0);
- WREG32_SOC15_RLC(GC, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
-
- /* Setup L2 cache */
- tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
- WREG32_SOC15(GC, 0, regVM_L2_CNTL, tmp);
- WREG32_SOC15(GC, 0, regVM_L2_CNTL3, 0);
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
}
/**
@@ -393,72 +536,100 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
- u32 tmp;
- tmp = RREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp,
- VM_L2_PROTECTION_FAULT_CNTL,
- TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
- value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- if (!value) {
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- CRASH_ON_NO_RETRY_FAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- CRASH_ON_RETRY_FAULT, 1);
- }
- WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, xcc_mask);
}
-static void gfxhub_v1_2_init(struct amdgpu_device *adev)
+static void gfxhub_v1_2_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
- hub->ctx0_ptb_addr_lo32 =
- SOC15_REG_OFFSET(GC, 0,
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
- hub->ctx0_ptb_addr_hi32 =
- SOC15_REG_OFFSET(GC, 0,
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
- hub->vm_inv_eng0_sem =
- SOC15_REG_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_SEM);
- hub->vm_inv_eng0_req =
- SOC15_REG_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_REQ);
- hub->vm_inv_eng0_ack =
- SOC15_REG_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ACK);
- hub->vm_context0_cntl =
- SOC15_REG_OFFSET(GC, 0, regVM_CONTEXT0_CNTL);
- hub->vm_l2_pro_fault_status =
- SOC15_REG_OFFSET(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS);
- hub->vm_l2_pro_fault_cntl =
- SOC15_REG_OFFSET(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL);
-
- hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
- hub->ctx_addr_distance = regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
- regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
- hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - regVM_INVALIDATE_ENG0_REQ;
- hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
- regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL -
+ regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance =
+ regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
}
+static void gfxhub_v1_2_init(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_init(adev, xcc_mask);
+}
+
+static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+
+
+
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+ }
+
+ return 0;
+}
const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
.get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset,
@@ -467,5 +638,38 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
.gart_disable = gfxhub_v1_2_gart_disable,
.set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default,
.init = gfxhub_v1_2_init,
- .get_xgmi_info = gfxhub_v1_1_get_xgmi_info,
+ .get_xgmi_info = gfxhub_v1_2_get_xgmi_info,
+};
+
+static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, inst_mask);
+
+ if (!amdgpu_sriov_vf(adev))
+ return gfxhub_v1_2_xcc_gart_enable(adev, inst_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_gart_disable(adev, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs = {
+ .suspend = &gfxhub_v1_2_xcp_suspend,
+ .resume = &gfxhub_v1_2_xcp_resume
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
index e2d508f5a7ee..997e9f90c990 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
@@ -26,4 +26,6 @@
extern const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs;
+extern struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs;
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 9b3a02527318..f173a61c6c15 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -120,7 +120,7 @@ static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -282,7 +282,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
@@ -331,7 +331,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned i;
for (i = 0 ; i < 18; ++i) {
@@ -360,7 +360,7 @@ static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -433,7 +433,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = {
static void gfxhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 4aacbbec31e2..d8fc3e8088cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -123,7 +123,7 @@ static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -291,7 +291,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
@@ -340,7 +340,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned i;
for (i = 0 ; i < 18; ++i) {
@@ -381,7 +381,7 @@ static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -462,7 +462,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = {
static void gfxhub_v2_1_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
@@ -651,7 +651,7 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
int time = 1000;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
index 13712640fa46..c53147f9c9fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
@@ -119,7 +119,7 @@ static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -290,7 +290,7 @@ static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
@@ -339,7 +339,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned i;
for (i = 0 ; i < 18; ++i) {
@@ -380,7 +380,7 @@ static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -463,7 +463,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = {
static void gfxhub_v3_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
index 6e0bd628c889..ae777487d72e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
@@ -122,7 +122,7 @@ static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -295,7 +295,7 @@ static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
@@ -344,7 +344,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev)
static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned i;
for (i = 0 ; i < 18; ++i) {
@@ -373,7 +373,7 @@ static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev)
static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
@@ -451,7 +451,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = {
static void gfxhub_v3_0_3_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index b213dcf8ca06..0c8a47989576 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -76,7 +76,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
/* GFX HUB */
/* This works because this interrupt is only
* enabled at init/resume and disabled in
@@ -84,11 +84,11 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* change over the course of suspend/resume.
*/
if (!adev->in_s0ix)
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
/* GFX HUB */
/* This works because this interrupt is only
* enabled at init/resume and disabled in
@@ -96,7 +96,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* change over the course of suspend/resume.
*/
if (!adev->in_s0ix)
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
break;
default:
break;
@@ -139,7 +139,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Try to handle the recoverable page faults by filling page
* tables
*/
- if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault))
return 1;
}
@@ -149,7 +149,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
(adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
RREG32(hub->vm_l2_pro_fault_status);
@@ -212,8 +212,7 @@ static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)
static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- return ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
(!amdgpu_sriov_vf(adev)));
}
@@ -249,7 +248,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int i;
unsigned char hub_ip = 0;
- hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
@@ -284,7 +283,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
* Issue a dummy read to wait for the ACK register to be cleared
* to avoid a false ACK due to the new fast GRBM interface.
*/
- if ((vmhub == AMDGPU_GFXHUB_0) &&
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
(adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
hub->eng_distance * eng, hub_ip);
@@ -343,7 +342,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
- if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
down_read_trylock(&adev->reset_domain->sem)) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
@@ -361,19 +360,19 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
mutex_lock(&adev->mman.gtt_window_lock);
- if (vmhub == AMDGPU_MMHUB_0) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+ if (vmhub == AMDGPU_MMHUB0(0)) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
- BUG_ON(vmhub != AMDGPU_GFXHUB_0);
+ BUG_ON(vmhub != AMDGPU_GFXHUB(0));
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
amdgpu_in_reset(adev) ||
ring->sched.ready == false) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
@@ -383,7 +382,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* translation. Avoid this by doing the invalidation from the SDMA
* itself.
*/
- r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
&job);
@@ -415,12 +414,13 @@ error_alloc:
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB()
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+ bool all_hub, uint32_t inst)
{
int vmid, i;
signed long r;
@@ -428,11 +428,11 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried_pasid;
bool ret;
u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,
@@ -440,12 +440,12 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r) {
amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
return -ETIME;
}
amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
@@ -461,12 +461,12 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
gmc_v10_0_flush_gpu_tlb(adev, vmid,
i, flush_type);
} else {
gmc_v10_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
+ AMDGPU_GFXHUB(0), flush_type);
}
if (!adev->enable_mes)
break;
@@ -534,7 +534,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
if (ring->is_mes_queue)
return;
- if (ring->vm_hub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -929,7 +929,8 @@ static int gmc_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
- adev->num_vmhubs = 2;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
@@ -1075,9 +1076,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (!adev->in_s0ix)
adev->gfxhub.funcs->set_fault_enable_default(adev, value);
adev->mmhub.funcs->set_fault_enable_default(adev, value);
- gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
if (!adev->in_s0ix)
- gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 4116c112e8a0..c571f0d95994 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -64,7 +64,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
/* GFX HUB */
/* This works because this interrupt is only
* enabled at init/resume and disabled in
@@ -72,11 +72,11 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* change over the course of suspend/resume.
*/
if (!adev->in_s0ix)
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
/* MM HUB */
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
/* GFX HUB */
/* This works because this interrupt is only
* enabled at init/resume and disabled in
@@ -84,7 +84,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* change over the course of suspend/resume.
*/
if (!adev->in_s0ix)
- amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true);
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
break;
default:
break;
@@ -110,7 +110,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if (entry->vmid_src == AMDGPU_GFXHUB_0)
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
@@ -170,7 +170,7 @@ static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev)
static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- return ((vmhub == AMDGPU_MMHUB_0) &&
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
(!amdgpu_sriov_vf(adev)));
}
@@ -202,7 +202,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int i;
unsigned char hub_ip = 0;
- hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
@@ -251,7 +251,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
hub->eng_distance * eng, 0, hub_ip);
/* Issue additional private vm invalidation to MMHUB */
- if ((vmhub != AMDGPU_GFXHUB_0) &&
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
(hub->vm_l2_bank_select_reserved_cid2) &&
!amdgpu_sriov_vf(adev)) {
inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
@@ -284,7 +284,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
- if ((vmhub == AMDGPU_GFXHUB_0) && !adev->gfx.is_poweron)
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
return;
/* flush hdp cache */
@@ -293,7 +293,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
- if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) &&
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
@@ -319,23 +319,24 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+ bool all_hub, uint32_t inst)
{
int vmid, i;
signed long r;
uint32_t seq;
uint16_t queried_pasid;
bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
/* 2 dwords flush + 8 dwords fence */
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,
@@ -343,12 +344,12 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r) {
amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
return -ETIME;
}
amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
@@ -364,12 +365,12 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
gmc_v11_0_flush_gpu_tlb(adev, vmid,
i, flush_type);
} else {
gmc_v11_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
+ AMDGPU_GFXHUB(0), flush_type);
}
}
}
@@ -435,7 +436,7 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
if (ring->is_mes_queue)
return;
- if (ring->vm_hub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
@@ -779,7 +780,8 @@ static int gmc_v11_0_sw_init(void *handle)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
- adev->num_vmhubs = 2;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size,
@@ -886,7 +888,7 @@ static int gmc_v11_0_sw_fini(void *handle)
static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32(hub->vm_contexts_disable, 0);
return;
@@ -921,7 +923,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
false : true;
adev->mmhub.funcs->set_fault_enable_default(adev, value);
- gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
+ gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b7dad4e67813..aa754c95a0b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -808,7 +808,7 @@ static int gmc_v6_0_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 402960b0174e..acd2b407860f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -419,12 +419,13 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
* @pasid: pasid to be flush
* @flush_type: type of flush
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+ bool all_hub, uint32_t inst)
{
int vmid;
unsigned int tmp;
@@ -977,7 +978,7 @@ static int gmc_v7_0_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 504c1b34dab7..85dead2a5702 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -617,12 +617,13 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* @pasid: pasid to be flush
* @flush_type: type of flush
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+ bool all_hub, uint32_t inst)
{
int vmid;
unsigned int tmp;
@@ -1093,7 +1094,7 @@ static int gmc_v8_0_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->num_vmhubs = 1;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2fe21cefd772..67e669e0141c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -79,6 +79,7 @@
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
+#define MAX_MEM_RANGES 8
static const char *gfxhub_client_ids[] = {
"CB",
@@ -481,7 +482,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- for (j = 0; j < adev->num_vmhubs; j++) {
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
@@ -491,25 +492,25 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* fini/suspend, so the overall state doesn't
* change over the course of suspend/resume.
*/
- if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0))
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
continue;
- if (j == AMDGPU_GFXHUB_0)
- tmp = RREG32_SOC15_IP(GC, reg);
- else
+ if (j >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_SOC15_IP(GC, reg);
tmp &= ~bits;
- if (j == AMDGPU_GFXHUB_0)
- WREG32_SOC15_IP(GC, reg, tmp);
- else
+ if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_SOC15_IP(GC, reg, tmp);
}
}
break;
case AMDGPU_IRQ_STATE_ENABLE:
- for (j = 0; j < adev->num_vmhubs; j++) {
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
@@ -519,20 +520,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* fini/suspend, so the overall state doesn't
* change over the course of suspend/resume.
*/
- if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0))
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
continue;
- if (j == AMDGPU_GFXHUB_0)
- tmp = RREG32_SOC15_IP(GC, reg);
- else
+ if (j >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_SOC15_IP(GC, reg);
tmp |= bits;
- if (j == AMDGPU_GFXHUB_0)
- WREG32_SOC15_IP(GC, reg, tmp);
- else
+ if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_SOC15_IP(GC, reg, tmp);
}
}
break;
@@ -556,11 +557,31 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
const char *hub_name;
u64 addr;
uint32_t cam_index = 0;
- int ret;
+ int ret, xcc_id = 0;
+ uint32_t node_id;
+
+ node_id = entry->node_id;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
+ hub_name = "mmhub0";
+ hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)];
+ } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
+ hub_name = "mmhub1";
+ hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
+ } else {
+ hub_name = "gfxhub0";
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+ node_id);
+ if (xcc_id < 0)
+ xcc_id = 0;
+ }
+ hub = &adev->vmhub[xcc_id];
+ }
+
if (retry_fault) {
if (adev->irq.retry_cam_enabled) {
/* Delegate it to a different ring if the hardware hasn't
@@ -573,7 +594,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
cam_index = entry->src_data[2] & 0x3ff;
- ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault);
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, write_fault);
WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
if (ret)
return 1;
@@ -595,7 +617,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
/* Try to handle the recoverable page faults by filling page
* tables
*/
- if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, write_fault))
return 1;
}
}
@@ -603,16 +626,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
if (!printk_ratelimit())
return 0;
- if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
- hub_name = "mmhub0";
- hub = &adev->vmhub[AMDGPU_MMHUB_0];
- } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
- hub_name = "mmhub1";
- hub = &adev->vmhub[AMDGPU_MMHUB_1];
- } else {
- hub_name = "gfxhub0";
- hub = &adev->vmhub[AMDGPU_GFXHUB_0];
- }
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
@@ -628,6 +641,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
+ node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
+ node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
+
if (amdgpu_sriov_vf(adev))
return 0;
@@ -636,7 +654,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
* be updated to avoid reading an incorrect value due to
* the new fast GRBM interface.
*/
- if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
(adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
RREG32(hub->vm_l2_pro_fault_status);
@@ -645,11 +663,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
-
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
+ if (entry->vmid_src == AMDGPU_GFXHUB(0)) {
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
gfxhub_client_ids[cid],
@@ -759,8 +776,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
return false;
- return ((vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) &&
+ return ((vmhub == AMDGPU_MMHUB0(0) ||
+ vmhub == AMDGPU_MMHUB1(0)) &&
(!amdgpu_sriov_vf(adev)) &&
(!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
(adev->apu_flags & AMD_APU_IS_PICASSO))));
@@ -803,7 +820,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
u32 j, inv_req, inv_req2, tmp;
struct amdgpu_vmhub *hub;
- BUG_ON(vmhub >= adev->num_vmhubs);
+ BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
hub = &adev->vmhub[vmhub];
if (adev->gmc.xgmi.num_physical_nodes &&
@@ -816,6 +833,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
*/
inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ } else if (flush_type == 2 &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+ adev->rev_id == 0) {
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, 0);
+ inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
} else {
inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
inv_req2 = 0;
@@ -824,7 +846,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
* as GFXOFF under bare metal
*/
- if (adev->gfx.kiq.ring.sched.ready &&
+ if (adev->gfx.kiq[0].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
down_read_trylock(&adev->reset_domain->sem)) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
@@ -849,11 +871,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
- if (vmhub == AMDGPU_GFXHUB_0)
- tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
- else
+ if (vmhub >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
-
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
if (tmp & 0x1)
break;
udelay(1);
@@ -864,27 +885,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
do {
- if (vmhub == AMDGPU_GFXHUB_0)
- WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
- else
+ if (vmhub >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
/*
* Issue a dummy read to wait for the ACK register to
* be cleared to avoid a false ACK due to the new fast
* GRBM interface.
*/
- if ((vmhub == AMDGPU_GFXHUB_0) &&
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
(adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
RREG32_NO_KIQ(hub->vm_inv_eng0_req +
hub->eng_distance * eng);
for (j = 0; j < adev->usec_timeout; j++) {
- if (vmhub == AMDGPU_GFXHUB_0)
- tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
- else
+ if (vmhub >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
-
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -900,10 +920,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
- if (vmhub == AMDGPU_GFXHUB_0)
- WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
- else
+ if (vmhub >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
}
spin_unlock(&adev->gmc.invalidate_lock);
@@ -921,12 +941,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* @pasid: pasid to be flush
* @flush_type: the flush type
* @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
*
* Flush the TLB for the requested pasid.
*/
static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
- bool all_hub)
+ bool all_hub, uint32_t inst)
{
int vmid, i;
signed long r;
@@ -934,8 +955,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried_pasid;
bool ret;
u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
if (amdgpu_in_reset(adev))
return -EIO;
@@ -955,24 +976,31 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
if (vega20_xgmi_wa)
ndw += kiq->pmf->invalidate_tlbs_size;
- spin_lock(&adev->gfx.kiq.ring_lock);
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
/* 2 dwords flush + 8 dwords fence */
amdgpu_ring_alloc(ring, ndw);
if (vega20_xgmi_wa)
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, 2, all_hub);
+
+ if (flush_type == 2 &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+ adev->rev_id == 0)
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, 0, all_hub);
+
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, flush_type, all_hub);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r) {
amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
up_read(&adev->reset_domain->sem);
return -ETIME;
}
amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
@@ -989,12 +1017,12 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
- for (i = 0; i < adev->num_vmhubs; i++)
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
gmc_v9_0_flush_gpu_tlb(adev, vmid,
i, flush_type);
} else {
gmc_v9_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, flush_type);
+ AMDGPU_GFXHUB(0), flush_type);
}
break;
}
@@ -1060,10 +1088,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
uint32_t reg;
/* Do nothing because there's no lut register for mmhub1. */
- if (ring->vm_hub == AMDGPU_MMHUB_1)
+ if (ring->vm_hub == AMDGPU_MMHUB1(0))
return;
- if (ring->vm_hub == AMDGPU_GFXHUB_0)
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
@@ -1159,13 +1187,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
- unsigned int mtype;
+ struct amdgpu_vm *vm = mapping->bo_va->base.vm;
+ unsigned int mtype_local, mtype;
bool snoop = false;
+ bool is_local;
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
- case IP_VERSION(9, 4, 3):
if (is_vram) {
if (bo_adev == adev) {
if (uncached)
@@ -1200,6 +1229,43 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
snoop = true;
}
break;
+ case IP_VERSION(9, 4, 3):
+ /* Only local VRAM BOs or system memory on non-NUMA APUs
+ * can be assumed to be local in their entirety. Choose
+ * MTYPE_NC as safe fallback for all system memory BOs on
+ * NUMA systems. Their MTYPE can be overridden per-page in
+ * gmc_v9_0_override_vm_pte_flags.
+ */
+ mtype_local = MTYPE_RW;
+ if (amdgpu_mtype_local == 1) {
+ DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+ mtype_local = MTYPE_NC;
+ } else if (amdgpu_mtype_local == 2) {
+ DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+ mtype_local = MTYPE_CC;
+ } else {
+ DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ }
+ is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
+ num_possible_nodes() <= 1) ||
+ (is_vram && adev == bo_adev &&
+ KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id);
+ snoop = true;
+ if (uncached) {
+ mtype = MTYPE_UC;
+ } else if (adev->flags & AMD_IS_APU) {
+ mtype = is_local ? mtype_local : MTYPE_NC;
+ } else {
+ /* dGPU */
+ if (is_local)
+ mtype = mtype_local;
+ else if (is_vram)
+ mtype = MTYPE_NC;
+ else
+ mtype = MTYPE_UC;
+ }
+
+ break;
default:
if (uncached || coherent)
mtype = MTYPE_UC;
@@ -1241,6 +1307,72 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
mapping, flags);
}
+static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags)
+{
+ int local_node, nid;
+
+ /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
+ * memory can use more efficient MTYPEs.
+ */
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+ return;
+
+ /* Only direct-mapped memory allows us to determine the NUMA node from
+ * the DMA address.
+ */
+ if (!adev->ram_is_direct_mapped) {
+ dev_dbg(adev->dev, "RAM is not direct mapped\n");
+ return;
+ }
+
+ /* Only override mappings with MTYPE_NC, which is the safe default for
+ * cacheable memory.
+ */
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
+ dev_dbg(adev->dev, "MTYPE is not NC\n");
+ return;
+ }
+
+ /* FIXME: Only supported on native mode for now. For carve-out, the
+ * NUMA affinity of the GPU/VM needs to come from the PCI info because
+ * memory partitions are not associated with different NUMA nodes.
+ */
+ if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
+ local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
+ } else {
+ dev_dbg(adev->dev, "Only native mode APU is supported.\n");
+ return;
+ }
+
+ /* Only handle real RAM. Mappings of PCIe resources don't have struct
+ * page or NUMA nodes.
+ */
+ if (!page_is_ram(addr >> PAGE_SHIFT)) {
+ dev_dbg(adev->dev, "Page is not RAM.\n");
+ return;
+ }
+ nid = pfn_to_nid(addr >> PAGE_SHIFT);
+ dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+ vm->mem_id, local_node, nid);
+ if (nid == local_node) {
+ uint64_t old_flags = *flags;
+ unsigned int mtype_local = MTYPE_RW;
+
+ if (amdgpu_mtype_local == 1)
+ mtype_local = MTYPE_NC;
+ else if (amdgpu_mtype_local == 2)
+ mtype_local = MTYPE_CC;
+
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(mtype_local);
+ dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
+ old_flags, *flags);
+ }
+}
+
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
@@ -1283,6 +1415,27 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
+static enum amdgpu_memory_partition
+gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+ enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ if (adev->nbio.funcs->get_memory_partition_mode)
+ mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+ supp_modes);
+
+ return mode;
+}
+
+static enum amdgpu_memory_partition
+gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return AMDGPU_NPS1_PARTITION_MODE;
+
+ return gmc_v9_0_get_memory_partition(adev, NULL);
+}
+
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
@@ -1291,7 +1444,9 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.map_mtype = gmc_v9_0_map_mtype,
.get_vm_pde = gmc_v9_0_get_vm_pde,
.get_vm_pte = gmc_v9_0_get_vm_pte,
+ .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
+ .query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -1372,6 +1527,9 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
adev->mmhub.ras = &mmhub_v1_7_ras;
break;
+ case IP_VERSION(1, 8, 0):
+ adev->mmhub.ras = &mmhub_v1_8_ras;
+ break;
default:
/* mmhub ras is not available */
break;
@@ -1419,9 +1577,13 @@ static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
- if (adev->asic_type == CHIP_VEGA20 ||
- adev->asic_type == CHIP_ARCTURUS)
+ /*
+ * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
+ * in their IP discovery tables
+ */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
adev->gmc.xgmi.supported = true;
if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
@@ -1430,6 +1592,20 @@ static int gmc_v9_0_early_init(void *handle)
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
}
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ enum amdgpu_pkg_type pkg_type =
+ adev->smuio.funcs->get_pkg_type(adev);
+ /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
+ * and the APU, can be in used two possible modes:
+ * - carveout mode
+ * - native APU mode
+ * "is_app_apu" can be used to identify the APU in the native
+ * mode.
+ */
+ adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU &&
+ !pci_resource_len(adev->pdev, 0));
+ }
+
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
gmc_v9_0_set_umc_funcs(adev);
@@ -1525,8 +1701,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
int r;
/* size in MB on si */
- adev->gmc.mc_vram_size =
- adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ if (!adev->gmc.is_app_apu) {
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ } else {
+ DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+ adev->gmc.mc_vram_size = 0;
+ }
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU) &&
@@ -1551,7 +1732,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
*/
/* check whether both host-gpu and gpu-gpu xgmi links exist */
- if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+ if ((!amdgpu_sriov_vf(adev) &&
+ (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
(adev->gmc.xgmi.supported &&
adev->gmc.xgmi.connected_to_cpu)) {
adev->gmc.aper_base =
@@ -1618,12 +1800,18 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
- r = amdgpu_gart_table_vram_alloc(adev);
- if (r)
- return r;
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU\n");
+ r = amdgpu_gart_table_ram_alloc(adev);
+ if (r)
+ dev_err(adev->dev, "Failed to allocate GART in system memory\n");
+ } else {
+ r = amdgpu_gart_table_vram_alloc(adev);
+ if (r)
+ return r;
- if (adev->gmc.xgmi.connected_to_cpu) {
- r = amdgpu_gmc_pdb0_alloc(adev);
+ if (adev->gmc.xgmi.connected_to_cpu)
+ r = amdgpu_gmc_pdb0_alloc(adev);
}
return r;
@@ -1644,10 +1832,178 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
}
+static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ u32 supp_modes;
+ bool valid;
+
+ mode = gmc_v9_0_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware not present in supported modes */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+ !(BIT(mode - 1) & supp_modes))
+ return false;
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 1);
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 2);
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 3 ||
+ adev->gmc.num_mem_partitions == 4);
+ break;
+ default:
+ valid = false;
+ }
+
+ return valid;
+}
+
+static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid)
+{
+ int i;
+
+ /* Check if node with id 'nid' is present in 'node_ids' array */
+ for (i = 0; i < num_ids; ++i)
+ if (node_ids[i] == nid)
+ return true;
+
+ return false;
+}
+
+static void
+gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ int num_ranges = 0, ret, mem_groups;
+ struct amdgpu_numa_info numa_info;
+ int node_ids[MAX_MEM_RANGES];
+ int num_xcc, xcc_id;
+ uint32_t xcc_mask;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ xcc_mask = (1U << num_xcc) - 1;
+ mem_groups = hweight32(adev->aid_mask);
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+ if (ret)
+ continue;
+
+ if (numa_info.nid == NUMA_NO_NODE) {
+ mem_ranges[0].size = numa_info.size;
+ mem_ranges[0].numa.node = numa_info.nid;
+ num_ranges = 1;
+ break;
+ }
+
+ if (gmc_v9_0_is_node_present(node_ids, num_ranges,
+ numa_info.nid))
+ continue;
+
+ node_ids[num_ranges] = numa_info.nid;
+ mem_ranges[num_ranges].numa.node = numa_info.nid;
+ mem_ranges[num_ranges].size = numa_info.size;
+ ++num_ranges;
+ }
+
+ adev->gmc.num_mem_partitions = num_ranges;
+
+ /* If there is only partition, don't use entire size */
+ if (adev->gmc.num_mem_partitions == 1) {
+ mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
+ do_div(mem_ranges[0].size, mem_groups);
+ }
+}
+
+static void
+gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ enum amdgpu_memory_partition mode;
+ u32 start_addr = 0, size;
+ int i;
+
+ mode = gmc_v9_0_query_memory_partition(adev);
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 2;
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ if (adev->flags & AMD_IS_APU)
+ adev->gmc.num_mem_partitions = 3;
+ else
+ adev->gmc.num_mem_partitions = 4;
+ break;
+ default:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ }
+
+ size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
+ size /= adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ mem_ranges[i].range.fpfn = start_addr;
+ mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+ mem_ranges[i].range.lpfn = start_addr + size - 1;
+ start_addr += size;
+ }
+
+ /* Adjust the last one */
+ mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn =
+ (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+ mem_ranges[adev->gmc.num_mem_partitions - 1].size =
+ adev->gmc.real_vram_size -
+ ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn
+ << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
+{
+ bool valid;
+
+ adev->gmc.mem_partitions = kzalloc(
+ MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info),
+ GFP_KERNEL);
+
+ if (!adev->gmc.mem_partitions)
+ return -ENOMEM;
+
+ /* TODO : Get the range from PSP/Discovery for dGPU */
+ if (adev->gmc.is_app_apu)
+ gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+ else
+ gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+ if (amdgpu_sriov_vf(adev))
+ valid = true;
+ else
+ valid = gmc_v9_0_validate_partition_info(adev);
+ if (!valid) {
+ /* TODO: handle invalid case */
+ dev_WARN(adev->dev,
+ "Mem ranges not matching with hardware config");
+ }
+
+ return 0;
+}
+
static int gmc_v9_0_sw_init(void *handle)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned long inst_mask = adev->aid_mask;
adev->gfxhub.funcs->init(adev);
@@ -1655,38 +2011,54 @@ static int gmc_v9_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- r = amdgpu_atomfirmware_get_vram_info(adev,
- &vram_width, &vram_type, &vram_vendor);
- if (amdgpu_sriov_vf(adev))
- /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
- * and DF related registers is not readable, seems hardcord is the
- * only way to set the correct vram_width
- */
- adev->gmc.vram_width = 2048;
- else if (amdgpu_emu_mode != 1)
- adev->gmc.vram_width = vram_width;
+ if (!(adev->bios) || adev->gmc.is_app_apu) {
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->gmc.is_app_apu) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+ } else {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ adev->gmc.vram_width = 64 * 64;
+ }
+ } else {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+ }
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (amdgpu_sriov_vf(adev))
+ /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+ * and DF related registers is not readable, seems hardcord is the
+ * only way to set the correct vram_width
+ */
+ adev->gmc.vram_width = 2048;
+ else if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = vram_width;
- if (!adev->gmc.vram_width) {
- int chansize, numchan;
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
- /* hbm memory channel size */
- if (adev->flags & AMD_IS_APU)
- chansize = 64;
- else
- chansize = 128;
- if (adev->df.funcs &&
- adev->df.funcs->get_hbm_channel_number) {
- numchan = adev->df.funcs->get_hbm_channel_number(adev);
- adev->gmc.vram_width = numchan * chansize;
+ /* hbm memory channel size */
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
+ if (adev->df.funcs &&
+ adev->df.funcs->get_hbm_channel_number) {
+ numchan = adev->df.funcs->get_hbm_channel_number(adev);
+ adev->gmc.vram_width = numchan * chansize;
+ }
}
- }
- adev->gmc.vram_type = vram_type;
- adev->gmc.vram_vendor = vram_vendor;
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+ }
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
- adev->num_vmhubs = 2;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
@@ -1702,9 +2074,8 @@ static int gmc_v9_0_sw_init(void *handle)
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 3, 0):
case IP_VERSION(9, 4, 2):
- case IP_VERSION(9, 4, 3):
- adev->num_vmhubs = 2;
-
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
* To fulfill 4-level page support,
@@ -1720,12 +2091,24 @@ static int gmc_v9_0_sw_init(void *handle)
adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 1):
- adev->num_vmhubs = 3;
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask);
/* Keep the vm size same with Vega20 */
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
+ case IP_VERSION(9, 4, 3):
+ bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
+ NUM_XCC(adev->gfx.xcc_mask));
+
+ inst_mask <<= AMDGPU_MMHUB0(0);
+ bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
+ break;
default:
break;
}
@@ -1764,7 +2147,7 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44;
+ dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44;
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
if (r) {
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
@@ -1778,6 +2161,12 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_gmc_get_vbios_allocations(adev);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ r = gmc_v9_0_init_mem_ranges(adev);
+ if (r)
+ return r;
+ }
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -1810,6 +2199,9 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ amdgpu_gmc_sysfs_init(adev);
+
return 0;
}
@@ -1817,10 +2209,20 @@ static int gmc_v9_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ amdgpu_gmc_sysfs_fini(adev);
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
+
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- amdgpu_gart_table_vram_free(adev);
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU free\n");
+ amdgpu_gart_table_ram_free(adev);
+ } else {
+ amdgpu_gart_table_vram_free(adev);
+ }
amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
@@ -1946,8 +2348,8 @@ static int gmc_v9_0_hw_init(void *handle)
adev->gfxhub.funcs->set_fault_enable_default(adev, value);
adev->mmhub.funcs->set_fault_enable_default(adev, value);
}
- for (i = 0; i < adev->num_vmhubs; ++i) {
- if (adev->in_s0ix && (i == AMDGPU_GFXHUB_0))
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0)))
continue;
gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index a3076eb8af6a..77595e9622da 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -437,7 +437,7 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case 126:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -460,6 +460,7 @@ int jpeg_v1_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v1_0_set_dec_ring_funcs(adev);
jpeg_v1_0_set_irq_funcs(adev);
@@ -484,15 +485,15 @@ int jpeg_v1_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring = adev->jpeg.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch =
+ adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] =
SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
return 0;
@@ -509,7 +510,7 @@ void jpeg_v1_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec);
+ amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
}
/**
@@ -522,7 +523,7 @@ void jpeg_v1_0_sw_fini(void *handle)
*/
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
if (mode == 0) {
WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
@@ -579,7 +580,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs;
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
DRM_INFO("JPEG decode is enabled in VM mode\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 0eddf7c824a7..c25d4a07350b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -49,6 +49,7 @@ static int jpeg_v2_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v2_0_set_dec_ring_funcs(adev);
jpeg_v2_0_set_irq_funcs(adev);
@@ -83,18 +84,18 @@ static int jpeg_v2_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
0, AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
return 0;
}
@@ -129,7 +130,7 @@ static int jpeg_v2_0_sw_fini(void *handle)
static int jpeg_v2_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
@@ -312,7 +313,7 @@ static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev)
*/
static int jpeg_v2_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -729,7 +730,7 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -791,7 +792,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs;
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs;
DRM_INFO("JPEG decode is enabled in VM mode\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 73e0dc5a10cd..aadb74de52bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -60,6 +60,7 @@ static int jpeg_v2_5_early_init(void *handle)
u32 harvest;
int i;
+ adev->jpeg.num_jpeg_rings = 1;
adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
@@ -125,12 +126,12 @@ static int jpeg_v2_5_sw_init(void *handle)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
ring->use_doorbell = true;
if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
- ring->vm_hub = AMDGPU_MMHUB_1;
+ ring->vm_hub = AMDGPU_MMHUB1(0);
else
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
sprintf(ring->name, "jpeg_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
@@ -138,8 +139,8 @@ static int jpeg_v2_5_sw_init(void *handle)
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
}
r = amdgpu_jpeg_ras_sw_init(adev);
@@ -186,7 +187,7 @@ static int jpeg_v2_5_hw_init(void *handle)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
@@ -329,7 +330,7 @@ static int jpeg_v2_5_start(struct amdgpu_device *adev)
if (adev->jpeg.harvest_config & (1 << i))
continue;
- ring = &adev->jpeg.inst[i].ring_dec;
+ ring = adev->jpeg.inst[i].ring_dec;
/* disable anti hang mechanism */
WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
@@ -602,7 +603,7 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -719,10 +720,10 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
if (adev->jpeg.harvest_config & (1 << i))
continue;
if (adev->asic_type == CHIP_ARCTURUS)
- adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_5_dec_ring_vm_funcs;
else /* CHIP_ALDEBARAN */
- adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_6_dec_ring_vm_funcs;
- adev->jpeg.inst[i].ring_dec.me = i;
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 1c2292cc5f2c..79791379fc2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -64,6 +64,7 @@ static int jpeg_v3_0_early_init(void *handle)
}
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v3_0_set_dec_ring_funcs(adev);
jpeg_v3_0_set_irq_funcs(adev);
@@ -98,18 +99,18 @@ static int jpeg_v3_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
return 0;
}
@@ -144,7 +145,7 @@ static int jpeg_v3_0_sw_fini(void *handle)
static int jpeg_v3_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
@@ -330,7 +331,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev)
*/
static int jpeg_v3_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -527,7 +528,7 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -589,7 +590,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs;
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs;
DRM_INFO("JPEG decode is enabled in VM mode\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index a3d83c9f2c9a..a707d407fbd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -58,6 +58,7 @@ static int jpeg_v4_0_early_init(void *handle)
adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
jpeg_v4_0_set_dec_ring_funcs(adev);
jpeg_v4_0_set_irq_funcs(adev);
@@ -105,10 +106,10 @@ static int jpeg_v4_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1);
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
@@ -116,8 +117,8 @@ static int jpeg_v4_0_sw_init(void *handle)
if (r)
return r;
- adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
r = amdgpu_jpeg_ras_sw_init(adev);
if (r)
@@ -156,7 +157,7 @@ static int jpeg_v4_0_sw_fini(void *handle)
static int jpeg_v4_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (amdgpu_sriov_vf(adev)) {
@@ -364,7 +365,7 @@ static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev)
*/
static int jpeg_v4_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
@@ -442,7 +443,7 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
table_size = 0;
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
regUVD_LMI_JRBC_RB_64BIT_BAR_LOW),
@@ -687,7 +688,7 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case VCN_4_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
@@ -749,7 +750,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
- adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_dec_ring_vm_funcs;
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs;
DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
new file mode 100644
index 000000000000..ce2b22f7e4e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -0,0 +1,1074 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+enum jpeg_engin_status {
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0,
+ UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_4_0__SRCID__JPEG_DECODE,
+ VCN_4_0__SRCID__JPEG1_DECODE,
+ VCN_4_0__SRCID__JPEG2_DECODE,
+ VCN_4_0__SRCID__JPEG3_DECODE,
+ VCN_4_0__SRCID__JPEG4_DECODE,
+ VCN_4_0__SRCID__JPEG5_DECODE,
+ VCN_4_0__SRCID__JPEG6_DECODE,
+ VCN_4_0__SRCID__JPEG7_DECODE
+};
+
+/**
+ * jpeg_v4_0_3_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_3_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+
+ jpeg_v4_0_3_set_dec_ring_funcs(adev);
+ jpeg_v4_0_3_set_irq_funcs(adev);
+ jpeg_v4_0_3_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_3_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 9 * jpeg_inst;
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(
+ JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0,
+ (j ? (0x40 * j - 0xc80) : 0));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_3_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_3_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v4_0_3_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ ring = adev->jpeg.inst[i].ring_dec;
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(
+ VCN, GET_INST(VCN, i),
+ regVCN_JPEG_DB_CTRL,
+ (ring->pipe ? (ring->pipe - 0x15) : 0),
+ ring->doorbell_index
+ << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+ DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_3_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ return ret;
+}
+
+/**
+ * jpeg_v4_0_3_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_3_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v4_0_3_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_3_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_3_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_3_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1));
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v4_0_3_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(
+ JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON
+ << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JPEG_POWER_STATUS),
+ 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_3_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
+
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
+ regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK << j,
+ ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j));
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(
+ JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(
+ JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(
+ JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
+{
+ int i, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_3_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(
+ JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_OFF
+ << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(
+ JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15_OFFSET(
+ JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ (ring->pipe ? (0x40 * ring->pipe - 0xc80) :
+ 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80004000);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00004000);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ if (ring->adev->jpeg.inst[ring->me].aid_id) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x4);
+ } else {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ if (ring->adev->jpeg.inst[ring->me].aid_id) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x0);
+ } else {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v4_0_3_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
+
+ ret &= ((RREG32_SOC15_OFFSET(
+ JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS,
+ reg_offset) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(
+ JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (enable) {
+ if (!jpeg_v4_0_3_is_idle(handle))
+ return -EBUSY;
+ jpeg_v4_0_3_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_3_disable_clock_gating(adev, i);
+ }
+ }
+ return 0;
+}
+
+static int jpeg_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_stop(adev);
+ else
+ ret = jpeg_v4_0_3_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_4_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_4_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_4_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_4_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_4_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_4_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_4_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
+ .name = "jpeg_v4_0_3",
+ .early_init = jpeg_v4_0_3_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v4_0_3_sw_init,
+ .sw_fini = jpeg_v4_0_3_sw_fini,
+ .hw_init = jpeg_v4_0_3_hw_init,
+ .hw_fini = jpeg_v4_0_3_hw_fini,
+ .suspend = jpeg_v4_0_3_suspend,
+ .resume = jpeg_v4_0_3_resume,
+ .is_idle = jpeg_v4_0_3_is_idle,
+ .wait_for_idle = jpeg_v4_0_3_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_3_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+ DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
+ .set = jpeg_v4_0_3_set_interrupt_state,
+ .process = jpeg_v4_0_3_process_interrupt,
+};
+
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+ }
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &jpeg_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"},
+};
+
+static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* jpeg v4_0_3 only support uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, jpeg_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ GET_INST(VCN, jpeg_inst));
+}
+
+static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ },
+};
+
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v4_0_3_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
new file mode 100644
index 000000000000..22483dc66351
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_3_H__
+#define __JPEG_V4_0_3_H__
+
+#define regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x404d
+#define regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x404e
+#define regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x404f
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ab
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ac
+#define regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40a4
+#define regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40a6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40b6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40b7
+#define regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x42d4
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x42d5
+#define regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
+
+#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 2e2062636d5f..36a123e6c8ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -149,7 +149,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
{
struct amdgpu_device *adev = mes->adev;
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
@@ -632,6 +632,8 @@ static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
+ memset(mqd, 0, sizeof(*mqd));
+
mqd->header = 0xC0310800;
mqd->compute_pipelinestat_enable = 0x00000001;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -728,6 +730,7 @@ static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
/* offset: 184 - this is used for CP_HQD_GFX_CONTROL */
mqd->cp_hqd_suspend_cntl_stack_offset = tmp;
+ amdgpu_device_flush_hdp(ring->adev, NULL);
return 0;
}
@@ -797,8 +800,8 @@ static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
int r;
if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
@@ -812,13 +815,7 @@ static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
- r = amdgpu_ring_test_ring(kiq_ring);
- if (r) {
- DRM_ERROR("kfq enable failed\n");
- kiq_ring->sched.ready = false;
- }
-
- return r;
+ return amdgpu_ring_test_helper(kiq_ring);
}
static int mes_v10_1_queue_init(struct amdgpu_device *adev)
@@ -863,9 +860,9 @@ static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- spin_lock_init(&adev->gfx.kiq.ring_lock);
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
ring->me = 3;
ring->pipe = 1;
@@ -891,7 +888,7 @@ static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
if (pipe == AMDGPU_MES_KIQ_PIPE)
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring;
else
@@ -901,6 +898,7 @@ static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
return 0;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
@@ -974,15 +972,15 @@ static int mes_v10_1_sw_fini(void *handle)
amdgpu_ucode_release(&adev->mes.fw[pipe]);
}
- amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
- &adev->gfx.kiq.ring.mqd_gpu_addr,
- &adev->gfx.kiq.ring.mqd_ptr);
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
&adev->mes.ring.mqd_gpu_addr,
&adev->mes.ring.mqd_ptr);
- amdgpu_ring_fini(&adev->gfx.kiq.ring);
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
amdgpu_ring_fini(&adev->mes.ring);
amdgpu_mes_fini(adev);
@@ -1038,7 +1036,7 @@ static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev)
mes_v10_1_enable(adev, true);
- mes_v10_1_kiq_setting(&adev->gfx.kiq.ring);
+ mes_v10_1_kiq_setting(&adev->gfx.kiq[0].ring);
r = mes_v10_1_queue_init(adev);
if (r)
@@ -1090,7 +1088,7 @@ static int mes_v10_1_hw_init(void *handle)
* MES uses KIQ ring exclusively so driver cannot access KIQ ring
* with MES enabled.
*/
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
adev->mes.ring.sched.ready = true;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 45280f047180..1bdaa00c0b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -164,7 +164,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
{
struct amdgpu_device *adev = mes->adev;
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
@@ -202,17 +202,14 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
- if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) &&
- (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
- (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3))))
- mes_add_queue_pkt.trap_en = 1;
-
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
@@ -339,6 +336,19 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
@@ -704,6 +714,8 @@ static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
+ memset(mqd, 0, sizeof(*mqd));
+
mqd->header = 0xC0310800;
mqd->compute_pipelinestat_enable = 0x00000001;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -797,6 +809,7 @@ static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+ amdgpu_device_flush_hdp(ring->adev, NULL);
return 0;
}
@@ -864,8 +877,8 @@ static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring)
static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
int r;
if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
@@ -879,12 +892,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
- r = amdgpu_ring_test_ring(kiq_ring);
- if (r) {
- DRM_ERROR("kfq enable failed\n");
- kiq_ring->sched.ready = false;
- }
- return r;
+ return amdgpu_ring_test_helper(kiq_ring);
}
static int mes_v11_0_queue_init(struct amdgpu_device *adev,
@@ -894,7 +902,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
int r;
if (pipe == AMDGPU_MES_KIQ_PIPE)
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring;
else
@@ -961,9 +969,9 @@ static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- spin_lock_init(&adev->gfx.kiq.ring_lock);
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
ring->me = 3;
ring->pipe = 1;
@@ -989,7 +997,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
if (pipe == AMDGPU_MES_KIQ_PIPE)
- ring = &adev->gfx.kiq.ring;
+ ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring;
else
@@ -999,6 +1007,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
return 0;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
@@ -1074,15 +1083,15 @@ static int mes_v11_0_sw_fini(void *handle)
amdgpu_ucode_release(&adev->mes.fw[pipe]);
}
- amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
- &adev->gfx.kiq.ring.mqd_gpu_addr,
- &adev->gfx.kiq.ring.mqd_ptr);
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
&adev->mes.ring.mqd_gpu_addr,
&adev->mes.ring.mqd_ptr);
- amdgpu_ring_fini(&adev->gfx.kiq.ring);
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
amdgpu_ring_fini(&adev->mes.ring);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
@@ -1175,7 +1184,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
mes_v11_0_enable(adev, true);
- mes_v11_0_kiq_setting(&adev->gfx.kiq.ring);
+ mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
if (r)
@@ -1196,7 +1205,7 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev)) {
- mes_v11_0_kiq_dequeue(&adev->gfx.kiq.ring);
+ mes_v11_0_kiq_dequeue(&adev->gfx.kiq[0].ring);
mes_v11_0_kiq_clear(adev);
}
@@ -1244,7 +1253,7 @@ static int mes_v11_0_hw_init(void *handle)
* MES uses KIQ ring exclusively so driver cannot access KIQ ring
* with MES enabled.
*/
- adev->gfx.kiq.ring.sched.ready = false;
+ adev->gfx.kiq[0].ring.sched.ready = false;
adev->mes.ring.sched.ready = true;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 15e7cbeae75b..fb91b31056ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -54,7 +54,7 @@ static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -229,7 +229,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@@ -285,7 +285,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -338,7 +338,7 @@ static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -415,7 +415,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v1_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 73afbf2facc9..9086f2fdfaf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -54,7 +54,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
@@ -261,7 +261,7 @@ static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@@ -319,7 +319,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -348,7 +348,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -425,7 +425,7 @@ static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v1_7_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 342d1702104c..5e8b493f8699 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -29,6 +29,7 @@
#include "soc15_common.h"
#include "soc15.h"
+#include "amdgpu_ras.h"
#define regVM_L2_CNTL3_DEFAULT 0x80100007
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
@@ -53,18 +54,30 @@ static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
-
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base));
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
}
static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base;
+ u32 inst_mask;
+ int i;
if (adev->gmc.pdb0_bo)
pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
@@ -76,187 +89,248 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
/* If use GART for FB translation, vmid0 page table covers both
* vram and system memory (gart)
*/
- if (adev->gmc.pdb0_bo) {
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.fb_start >> 12));
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.fb_start >> 44));
-
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->gmc.gart_end >> 12));
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->gmc.gart_end >> 44));
-
- } else {
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.gart_start >> 12));
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.gart_start >> 44));
-
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->gmc.gart_end >> 12));
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->gmc.gart_end >> 44));
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ } else {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
}
}
static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
{
+ uint32_t tmp, inst_mask;
uint64_t value;
- uint32_t tmp;
+ int i;
- /* Program the AGP BAR */
- WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BASE, 0);
- WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT,
+ adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
+ adev->gmc.agp_end >> 24);
- /* Program the system aperture low logical page number. */
- WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ if (amdgpu_sriov_vf(adev))
+ return;
- WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- /* In the case squeezing vram into GART aperture, we don't use
- * FB aperture and AGP aperture. Disable them.
- */
- if (adev->gmc.pdb0_bo) {
- WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, 0xFFFFFF);
- WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, 0);
- WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, 0);
- WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
- WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
- WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
- }
- if (amdgpu_sriov_vf(adev))
- return;
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
- /* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
- WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
- (u32)(value >> 12));
- WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
- (u32)(value >> 44));
-
- /* Program "protection fault". */
- WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page_addr >> 12));
- WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page_addr >> 44));
-
- tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
- ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
- WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_BASE,
+ 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ 0x3FFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
}
static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
{
- uint32_t tmp;
+ uint32_t tmp, inst_mask;
+ int i;
/* Setup TLB control */
- tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
-
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- ENABLE_ADVANCED_DRIVER_MODEL, 1);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- MTYPE, MTYPE_UC);/* XXX for emulation. */
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
-
- WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
}
static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev)
{
- uint32_t tmp;
+ uint32_t tmp, inst_mask;
+ int i;
if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
- tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
- /* XXX for emulation, Refer to closed source code.*/
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
- 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp);
-
- tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2, tmp);
-
- tmp = regVM_L2_CNTL3_DEFAULT;
- if (adev->gmc.translate_further) {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
- L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
- } else {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
- L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
- }
- WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, tmp);
-
- tmp = regVM_L2_CNTL4_DEFAULT;
- if (adev->gmc.xgmi.connected_to_cpu) {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
- VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
- VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
- } else {
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
- VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
- VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp);
}
- WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL4, tmp);
}
static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev)
{
- uint32_t tmp;
-
- tmp = RREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
- adev->gmc.vmid0_page_table_depth);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
- adev->gmc.vmid0_page_table_block_size);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
- RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
- WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL, tmp);
+ uint32_t tmp, inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp,
+ VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL, tmp);
+ }
}
static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev)
{
+ u32 inst_mask;
+ int i;
+
if (amdgpu_sriov_vf(adev))
return;
- WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0xFFFFFFFF);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, 0x0000000F);
-
- WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
-
- WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
}
static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
- unsigned num_level, block_size;
- uint32_t tmp;
- int i;
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp, inst_mask;
+ int i, j;
num_level = adev->vm_manager.num_level;
block_size = adev->vm_manager.block_size;
@@ -265,60 +339,75 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
else
block_size -= 9;
- for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
- num_level);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
- 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- PAGE_TABLE_BLOCK_SIZE,
- block_size);
- /* On Aldebaran, XNACK can be enabled in the SQ per-process.
- * Retry faults need to be enabled for that to work.
- */
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
- 1);
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
- i * hub->ctx_distance, tmp);
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
- i * hub->ctx_addr_distance, 0);
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
- i * hub->ctx_addr_distance, 0);
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
- i * hub->ctx_addr_distance,
- lower_32_bits(adev->vm_manager.max_pfn - 1));
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
- i * hub->ctx_addr_distance,
- upper_32_bits(adev->vm_manager.max_pfn - 1));
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH, num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* On 9.4.3, XNACK can be enabled in the SQ
+ * per-process. Retry faults need to be enabled for
+ * that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1);
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
}
}
static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
- unsigned i;
-
- for (i = 0; i < 18; ++i) {
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
- i * hub->eng_addr_distance, 0xffffffff);
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
- i * hub->eng_addr_distance, 0x1f);
+ struct amdgpu_vmhub *hub;
+ u32 i, j, inst_mask;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
}
}
@@ -352,28 +441,34 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub;
u32 tmp;
- u32 i;
+ u32 i, j, inst_mask;
/* Disable all tables */
- for (i = 0; i < 16; i++)
- WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL,
- i * hub->ctx_distance, 0);
-
- /* Setup TLB control */
- tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- ENABLE_ADVANCED_DRIVER_MODEL, 0);
- WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
-
- if (!amdgpu_sriov_vf(adev)) {
- /* Setup L2 cache */
- tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp);
- WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, 0);
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE,
+ 0);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
+ }
}
}
@@ -385,73 +480,83 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
*/
static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
- u32 tmp;
+ u32 tmp, inst_mask;
+ int i;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
- value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
- if (!value) {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- CRASH_ON_NO_RETRY_FAULT, 1);
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
- CRASH_ON_RETRY_FAULT, 1);
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
-
- WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static void mmhub_v1_8_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
-
- hub->ctx0_ptb_addr_lo32 =
- SOC15_REG_OFFSET(MMHUB, 0,
- regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
- hub->ctx0_ptb_addr_hi32 =
- SOC15_REG_OFFSET(MMHUB, 0,
- regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
- hub->vm_inv_eng0_req =
- SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_REQ);
- hub->vm_inv_eng0_ack =
- SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ACK);
- hub->vm_context0_cntl =
- SOC15_REG_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL);
- hub->vm_l2_pro_fault_status =
- SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_STATUS);
- hub->vm_l2_pro_fault_cntl =
- SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL);
-
- hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
- hub->ctx_addr_distance = regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
- regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
- hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - regVM_INVALIDATE_ENG0_REQ;
- hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
- regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+
+ hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
}
static int mmhub_v1_8_set_clockgating(struct amdgpu_device *adev,
@@ -475,3 +580,277 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
.set_clockgating = mmhub_v1_8_set_clockgating,
.get_clockgating = mmhub_v1_8_get_clockgating,
};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = {
+ {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"},
+ {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"},
+ {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"},
+ {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"},
+ {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"},
+ {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"},
+ {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"},
+ {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"},
+ {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"},
+ {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"},
+ {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"},
+ {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"},
+ {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"},
+};
+
+static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &err_data->ce_count);
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_inst);
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_inst);
+}
+
+static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_error_count(adev, i);
+}
+
+static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
+ regMMEA0_ERR_STATUS,
+ regMMEA1_ERR_STATUS,
+ regMMEA2_ERR_STATUS,
+ regMMEA3_ERR_STATUS,
+ regMMEA4_ERR_STATUS,
+};
+
+static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ uint32_t reg_value;
+ uint32_t mmea_err_status_addr_dist;
+ uint32_t i;
+
+ /* query mmea ras err status */
+ mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist);
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
+ i, mmhub_inst, reg_value);
+ }
+ }
+
+ /* query mm_cane ras err status */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+ if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
+ mmhub_inst, reg_value);
+ }
+}
+
+static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_err_status(adev, i);
+}
+
+static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ uint32_t mmea_cgtt_clk_cntl_addr_dist;
+ uint32_t mmea_err_status_addr_dist;
+ uint32_t reg_value;
+ uint32_t i;
+
+ /* reset mmea ras err status */
+ mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
+ mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+ /* force clk branch on for response path
+ * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
+ */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+ SOFT_OVERRIDE_RETURN, 1);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist,
+ reg_value);
+
+ /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 1);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist,
+ reg_value);
+
+ /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+ SOFT_OVERRIDE_RETURN, 0);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist,
+ reg_value);
+ }
+
+ /* reset mm_cane ras err status
+ * force clk branch on for response path
+ * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
+ */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+ SOFT_OVERRIDE_ATRET, 1);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+
+ /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 1);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);
+
+ /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+ SOFT_OVERRIDE_ATRET, 0);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+}
+
+static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_err_status(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
+ .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_8_ras_hw_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
index 0bb36200e4e5..126f0075ac50 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
@@ -24,5 +24,6 @@
#define __MMHUB_V1_8_H__
extern const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_8_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 278e32db878d..8f76c6ecf50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -187,7 +187,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -362,7 +362,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
@@ -412,7 +412,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -441,7 +441,7 @@ static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -520,7 +520,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = {
static void mmhub_v2_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index fcf2813e70db..8bd0fc8d9d25 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -121,7 +121,7 @@ static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
@@ -280,7 +280,7 @@ static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
@@ -330,7 +330,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -373,7 +373,7 @@ static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev)
static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -446,7 +446,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = {
static void mmhub_v2_3_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
index 17a792616979..441379e91cfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -136,7 +136,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -319,7 +319,7 @@ static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
@@ -369,7 +369,7 @@ static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -398,7 +398,7 @@ static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -477,7 +477,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = {
static void mmhub_v3_0_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index 26509b6b8c24..12c7f4b46ea9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -138,7 +138,7 @@ static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -306,7 +306,7 @@ static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
@@ -356,7 +356,7 @@ static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -385,7 +385,7 @@ static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -459,7 +459,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
index 26abbc6a47ab..5dadc85abf7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -129,7 +129,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -311,7 +311,7 @@ static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
@@ -361,7 +361,7 @@ static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -390,7 +390,7 @@ static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev)
static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
@@ -469,7 +469,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = {
static void mmhub_v3_0_2_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 72083e96222f..e790f890aec6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -57,7 +57,7 @@ static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
uint32_t vmid, uint64_t value)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
@@ -294,7 +294,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned int num_level, block_size;
uint32_t tmp;
int i;
@@ -363,7 +363,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
int hubid)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
@@ -404,7 +404,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i, j;
@@ -507,8 +507,8 @@ static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool
static void mmhub_v9_4_init(struct amdgpu_device *adev)
{
- struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] =
- {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]};
+ struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = {
+ &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]};
int i;
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
index 3e4e858a6965..a773ef61b78c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
@@ -30,6 +30,8 @@
#define MMSCH_VERSION_MINOR 0
#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+#define MMSCH_V3_0_VCN_INSTANCES 0x2
+
enum mmsch_v3_0_command_type {
MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
@@ -47,7 +49,7 @@ struct mmsch_v3_0_table_info {
struct mmsch_v3_0_init_header {
uint32_t version;
uint32_t total_size;
- struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES];
+ struct mmsch_v3_0_table_info inst[MMSCH_V3_0_VCN_INSTANCES];
};
struct mmsch_v3_0_cmd_direct_reg_header {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
index 83653a50a1a2..796d4f8791e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -43,6 +43,8 @@
#define MMSCH_VF_MAILBOX_RESP__OK 0x1
#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_V4_0_VCN_INSTANCES 0x2
+
enum mmsch_v4_0_command_type {
MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
@@ -60,7 +62,7 @@ struct mmsch_v4_0_table_info {
struct mmsch_v4_0_init_header {
uint32_t version;
uint32_t total_size;
- struct mmsch_v4_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES];
+ struct mmsch_v4_0_table_info inst[MMSCH_V4_0_VCN_INSTANCES];
struct mmsch_v4_0_table_info jpegdec;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 24d12075ca3a..cd1a02d30420 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -30,6 +30,8 @@
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
+#define NPS_MODE_MASK 0x000000FFL
+
static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
@@ -66,6 +68,13 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
bool use_doorbell, int doorbell_index, int doorbell_size)
{
u32 doorbell_range = 0, doorbell_ctrl = 0;
+ int aid_id, dev_inst;
+
+ dev_inst = GET_INST(SDMA0, instance);
+ aid_id = adev->sdma.instance[instance].aid_id;
+
+ if (use_doorbell == false)
+ return;
doorbell_range =
REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
@@ -80,9 +89,10 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_RANGE_SIZE, doorbell_size);
- switch (instance) {
+ switch (dev_inst % adev->sdma.num_inst_per_aid) {
case 0:
- WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1, doorbell_range);
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1,
+ 4 * aid_id, doorbell_range);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -94,10 +104,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
0x1);
- WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL, doorbell_ctrl);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_1_CTRL,
+ aid_id, doorbell_ctrl);
break;
case 1:
- WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2, doorbell_range);
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2,
+ 4 * aid_id, doorbell_range);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -109,10 +121,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
0x2);
- WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL, doorbell_ctrl);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_2_CTRL,
+ aid_id, doorbell_ctrl);
break;
case 2:
- WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3, doorbell_range);
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3,
+ 4 * aid_id, doorbell_range);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -124,10 +138,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
0x8);
- WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL, doorbell_ctrl);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_5_CTRL,
+ aid_id, doorbell_ctrl);
break;
case 3:
- WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4, doorbell_range);
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4,
+ 4 * aid_id, doorbell_range);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -139,11 +155,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
0x9);
- WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_6_CTRL, doorbell_ctrl);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_6_CTRL,
+ aid_id, doorbell_ctrl);
break;
default:
break;
- };
+ }
return;
}
@@ -152,6 +169,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
int doorbell_index, int instance)
{
u32 doorbell_range = 0, doorbell_ctrl = 0;
+ u32 aid_id = instance;
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
@@ -161,7 +179,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_SIZE_ENTRY,
- 0x8);
+ 0x9);
+ if (aid_id)
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ DOORBELL0_FENCE_ENABLE_ENTRY,
+ 0x4);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -174,10 +197,15 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
- S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8);
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
} else {
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
@@ -185,10 +213,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
- }
- WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17, doorbell_range);
- WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL, doorbell_ctrl);
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ }
}
static void nbio_v7_9_enable_doorbell_aperture(struct amdgpu_device *adev,
@@ -235,7 +265,7 @@ static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev,
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_SIZE_ENTRY,
- 0x4);
+ 0x8);
ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -248,7 +278,7 @@ static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev,
S2A_DOORBELL_PORT1_RANGE_OFFSET, 0);
ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
- S2A_DOORBELL_PORT1_RANGE_SIZE, 0x4);
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8);
ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0);
@@ -319,6 +349,11 @@ static u32 nbio_v7_9_get_pcie_data_offset(struct amdgpu_device *adev)
return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
}
+static u32 nbio_v7_9_get_pcie_index_hi_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2_HI);
+}
+
const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg = {
.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
@@ -347,11 +382,57 @@ static void nbio_v7_9_enable_doorbell_interrupt(struct amdgpu_device *adev,
DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
}
+static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev)
+{
+ u32 tmp, px;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS);
+ px = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_STATUS,
+ PARTITION_MODE);
+
+ return px;
+}
+
+static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev,
+ u32 *supp_modes)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE);
+
+ if (supp_modes) {
+ *supp_modes =
+ RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP);
+ }
+
+ return ffs(tmp);
+}
+
+static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
+ 0xff & ~(adev->gfx.xcc_mask));
+
+ WREG32_SOC15(NBIO, 0, regBIFC_GFX_INT_MONITOR_MASK, 0x7ff);
+
+ inst_mask = adev->aid_mask & ~1U;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15_EXT(NBIO, i, regXCC_DOORBELL_FENCE, i,
+ XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
+
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_9_get_pcie_index_offset,
.get_pcie_data_offset = nbio_v7_9_get_pcie_data_offset,
+ .get_pcie_index_hi_offset = nbio_v7_9_get_pcie_index_hi_offset,
.get_rev_id = nbio_v7_9_get_rev_id,
.mc_access_enable = nbio_v7_9_mc_access_enable,
.get_memsize = nbio_v7_9_get_memsize,
@@ -366,4 +447,7 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.get_clockgating_state = nbio_v7_9_get_clockgating_state,
.ih_control = nbio_v7_9_ih_control,
.remap_hdp_registers = nbio_v7_9_remap_hdp_registers,
+ .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
+ .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
+ .init_registers = nbio_v7_9_init_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 0fb6013441f0..51523b27a186 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -341,11 +341,6 @@ void nv_grbm_select(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
-static void nv_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
-}
-
static bool nv_read_disabled_bios(struct amdgpu_device *adev)
{
/* todo */
@@ -381,12 +376,12 @@ static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -632,9 +627,9 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
if (enter)
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
else
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
if (adev->gfx.funcs->update_perfmon_mgcg)
adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
@@ -654,7 +649,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.read_register = &nv_read_register,
.reset = &nv_asic_reset,
.reset_method = &nv_asic_reset_method,
- .set_vga_state = &nv_vga_set_state,
.get_xclk = &nv_get_xclk,
.set_uvd_clocks = &nv_set_uvd_clocks,
.set_vce_clocks = &nv_set_vce_clocks,
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index fd6b58243b03..631dafb92299 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -462,6 +462,9 @@
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
#define PACKET3_RUN_LIST 0xA5
#define PACKET3_MAP_PROCESS_VM 0xA6
-
+/* GFX11 */
+#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
+# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
+# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 22c775f39119..18917df785ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -102,6 +102,7 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */
+ GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */
};
/* PSP boot config sub-commands */
@@ -338,6 +339,13 @@ struct psp_gfx_cmd_boot_cfg
uint32_t boot_config_valid; /* dynamic boot configuration valid bits bitmask */
};
+struct psp_gfx_cmd_sriov_spatial_part {
+ uint32_t mode;
+ uint32_t override_ips;
+ uint32_t override_xcds_avail;
+ uint32_t override_this_aid;
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -351,6 +359,7 @@ union psp_gfx_commands
struct psp_gfx_cmd_setup_tmr cmd_setup_vmr;
struct psp_gfx_cmd_load_toc cmd_load_toc;
struct psp_gfx_cmd_boot_cfg boot_cfg;
+ struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
};
struct psp_gfx_uresp_reserved
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index caee76ab7110..f9cb0d2c89d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -624,10 +624,11 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
- return 0;
-
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
- MBOX_READY_FLAG, MBOX_READY_MASK, false);
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, false);
if (ret) {
dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
index b2414a729ca1..de5677ce4330 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
@@ -25,6 +25,8 @@
#include "amdgpu_psp.h"
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
void psp_v13_0_set_psp_funcs(struct psp_context *psp);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index fd2a7b66ac56..51afc92994a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -466,8 +466,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
sdma_v2_4_enable(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index e572389089d2..344202870aeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -734,8 +734,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
#endif
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
-
- ring->sched.ready = true;
}
/* unhalt the MEs */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 9295ac7edd56..cd37f45e01a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1114,8 +1114,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
/**
@@ -1202,8 +1200,6 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
static void
@@ -1825,12 +1821,12 @@ static int sdma_v4_0_sw_init(void *handle)
/*
* On Arcturus, SDMA instance 5~7 has a different vmhub
- * type(AMDGPU_MMHUB_1).
+ * type(AMDGPU_MMHUB1).
*/
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
- ring->vm_hub = AMDGPU_MMHUB_1;
+ ring->vm_hub = AMDGPU_MMHUB1(0);
else
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
@@ -1847,13 +1843,23 @@ static int sdma_v4_0_sw_init(void *handle)
/* paging queue use same doorbell index/routing as gfx queue
* with 0x400 (4096 dwords) offset on second doorbell page
*/
- ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
- ring->doorbell_index += 0x400;
+ if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
+ adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) {
+ ring->doorbell_index =
+ adev->doorbell_index.sdma_engine[i] << 1;
+ ring->doorbell_index += 0x400;
+ } else {
+ /* From vega20, the sdma_doorbell_range in 1st
+ * doorbell page is reserved for page queue.
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ }
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
- ring->vm_hub = AMDGPU_MMHUB_1;
+ ring->vm_hub = AMDGPU_MMHUB1(0);
else
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "page%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
@@ -2306,7 +2312,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
.secure_submission_supported = true,
@@ -2338,7 +2344,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
.secure_submission_supported = true,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
index 6f9895cdddb1..0ddb6955a6d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -141,6 +141,10 @@ static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
0, 0,
},
+ { "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED),
+ 0, 0,
+ },
{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
0, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 64dcaa2670dd..ea5e12390d18 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -27,6 +27,7 @@
#include <linux/pci.h>
#include "amdgpu.h"
+#include "amdgpu_xcp.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
@@ -53,11 +54,14 @@ static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
{
- return (adev->reg_offset[SDMA0_HWIP][instance][0] + offset);
+ u32 dev_inst = GET_INST(SDMA0, instance);
+
+ return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
}
static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
@@ -92,13 +96,25 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
}
}
-static void sdma_v4_4_2_init_golden_registers(struct amdgpu_device *adev)
+static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
- case IP_VERSION(4, 4, 2):
- break;
- default:
- break;
+ u32 val;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val);
+
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS,
+ 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val);
}
}
@@ -399,19 +415,21 @@ static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
/**
- * sdma_v4_4_2_gfx_stop - stop the gfx async dma engines
+ * sdma_v4_4_2_inst_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
*
* Stop the gfx async dma ring buffers.
*/
-static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i, unset = 0;
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].ring;
if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
@@ -429,32 +447,36 @@ static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev)
}
/**
- * sdma_v4_4_2_rlc_stop - stop the compute async dma engines
+ * sdma_v4_4_2_inst_rlc_stop - stop the compute async dma engines
*
* @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
*
* Stop the compute async dma queues.
*/
-static void sdma_v4_4_2_rlc_stop(struct amdgpu_device *adev)
+static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
/* XXX todo */
}
/**
- * sdma_v4_4_2_page_stop - stop the page async dma engines
+ * sdma_v4_4_2_inst_page_stop - stop the page async dma engines
*
* @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
*
* Stop the page async dma ring buffers.
*/
-static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev)
+static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
bool unset = false;
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].page;
if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
@@ -475,14 +497,16 @@ static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev)
}
/**
- * sdma_v4_4_2_ctx_switch_enable - stop the async dma engines context switch
+ * sdma_v4_4_2_inst_ctx_switch_enable - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs context switch.
+ * @inst_mask: mask of dma engine instances to be enabled
*
* Halt or unhalt the async dma engines context switch.
*/
-static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev,
+ bool enable, uint32_t inst_mask)
{
u32 f32_cntl, phase_quantum = 0;
int i;
@@ -511,7 +535,7 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl
unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT;
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
f32_cntl = RREG32_SDMA(i, regSDMA_CNTL);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL,
AUTO_CTXSW_ENABLE, enable ? 1 : 0);
@@ -525,30 +549,36 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl
/* Extend page fault timeout to avoid interrupt storm */
WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080);
}
-
}
/**
- * sdma_v4_4_2_enable - stop the async dma engines
+ * sdma_v4_4_2_inst_enable - stop the async dma engines
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs.
+ * @inst_mask: mask of dma engine instances to be enabled
*
* Halt or unhalt the async dma engines.
*/
-static void sdma_v4_4_2_enable(struct amdgpu_device *adev, bool enable)
+static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable,
+ uint32_t inst_mask)
{
u32 f32_cntl;
int i;
if (!enable) {
- sdma_v4_4_2_gfx_stop(adev);
- sdma_v4_4_2_rlc_stop(adev);
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ sdma_v4_4_2_inst_rlc_stop(adev, inst_mask);
if (adev->sdma.has_page_queue)
- sdma_v4_4_2_page_stop(adev);
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ /* SDMA FW needs to respond to FREEZE requests during reset.
+ * Keep it running during reset */
+ if (!amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ return;
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1);
WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl);
@@ -659,8 +689,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
/**
@@ -750,8 +778,6 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
#endif
/* enable DMA IBs */
WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
-
- ring->sched.ready = true;
}
static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
@@ -760,14 +786,16 @@ static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
}
/**
- * sdma_v4_4_2_rlc_resume - setup and start the async dma engines
+ * sdma_v4_4_2_inst_rlc_resume - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
*
* Set up the compute DMA queues and enable them.
* Returns 0 for success, error for failure.
*/
-static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev)
+static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
sdma_v4_4_2_init_pg(adev);
@@ -775,14 +803,16 @@ static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev)
}
/**
- * sdma_v4_4_2_load_microcode - load the sDMA ME ucode
+ * sdma_v4_4_2_inst_load_microcode - load the sDMA ME ucode
*
* @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
*
* Loads the sDMA0/1 ucode.
* Returns 0 for success, -EINVAL if the ucode is not available.
*/
-static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev)
+static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
const struct sdma_firmware_header_v1_0 *hdr;
const __le32 *fw_data;
@@ -790,9 +820,9 @@ static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev)
int i, j;
/* halt the MEs */
- sdma_v4_4_2_enable(adev, false);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
if (!adev->sdma.instance[i].fw)
return -EINVAL;
@@ -818,38 +848,42 @@ static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev)
}
/**
- * sdma_v4_4_2_start - setup and start the async dma engines
+ * sdma_v4_4_2_inst_start - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
*
* Set up the DMA engines and enable them.
* Returns 0 for success, error for failure.
*/
-static int sdma_v4_4_2_start(struct amdgpu_device *adev)
+static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
+ uint32_t inst_mask)
{
struct amdgpu_ring *ring;
+ uint32_t tmp_mask;
int i, r = 0;
if (amdgpu_sriov_vf(adev)) {
- sdma_v4_4_2_ctx_switch_enable(adev, false);
- sdma_v4_4_2_enable(adev, false);
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
} else {
/* bypass sdma microcode loading on Gopher */
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
- !(adev->pdev->device == 0x49) && !(adev->pdev->device == 0x50)) {
- r = sdma_v4_4_2_load_microcode(adev);
+ adev->sdma.instance[0].fw) {
+ r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
if (r)
return r;
}
/* unhalt the MEs */
- sdma_v4_4_2_enable(adev, true);
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
/* enable sdma ring preemption */
- sdma_v4_4_2_ctx_switch_enable(adev, true);
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
}
/* start the gfx rings and rlc compute queues */
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
uint32_t temp;
WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
@@ -860,6 +894,8 @@ static int sdma_v4_4_2_start(struct amdgpu_device *adev)
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, regSDMA_CNTL);
temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
WREG32_SDMA(i, regSDMA_CNTL, temp);
if (!amdgpu_sriov_vf(adev)) {
@@ -876,15 +912,16 @@ static int sdma_v4_4_2_start(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev)) {
- sdma_v4_4_2_ctx_switch_enable(adev, true);
- sdma_v4_4_2_enable(adev, true);
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
} else {
- r = sdma_v4_4_2_rlc_resume(adev);
+ r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask);
if (r)
return r;
}
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_helper(ring);
@@ -1221,6 +1258,7 @@ static int sdma_v4_4_2_early_init(void *handle)
sdma_v4_4_2_set_buffer_funcs(adev);
sdma_v4_4_2_set_vm_pte_funcs(adev);
sdma_v4_4_2_set_irq_funcs(adev);
+ sdma_v4_4_2_set_ras_funcs(adev);
return 0;
}
@@ -1253,9 +1291,10 @@ static int sdma_v4_4_2_sw_init(void *handle)
struct amdgpu_ring *ring;
int r, i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 aid_id;
/* SDMA trap event */
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
SDMA0_4_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
@@ -1264,7 +1303,7 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
/* SDMA SRAM ECC event */
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
&adev->sdma.ecc_irq);
@@ -1273,7 +1312,7 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
/* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
SDMA0_4_0__SRCID__SDMA_VM_HOLE,
&adev->sdma.vm_hole_irq);
@@ -1303,15 +1342,17 @@ static int sdma_v4_4_2_sw_init(void *handle)
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
+ aid_id = adev->sdma.instance[i].aid_id;
DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
/* doorbell size is 2 dwords, get DWORD offset */
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
- sprintf(ring->name, "sdma%d", i);
+ sprintf(ring->name, "sdma%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -1323,14 +1364,15 @@ static int sdma_v4_4_2_sw_init(void *handle)
ring->ring_obj = NULL;
ring->use_doorbell = true;
- /* paging queue use same doorbell index/routing as gfx queue
- * with 0x400 (4096 dwords) offset on second doorbell page
+ /* doorbell index of page queue is assigned right after
+ * gfx queue on the same instance
*/
- ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
- ring->doorbell_index += 0x400;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
- sprintf(ring->name, "page%d", i);
+ sprintf(ring->name, "page%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
@@ -1340,6 +1382,11 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
}
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "fail to initialize sdma ras block\n");
+ return -EINVAL;
+ }
+
return r;
}
@@ -1366,14 +1413,13 @@ static int sdma_v4_4_2_hw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t inst_mask;
- if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
-
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!amdgpu_sriov_vf(adev))
- sdma_v4_4_2_init_golden_registers(adev);
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_start(adev);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask);
return r;
}
@@ -1381,26 +1427,36 @@ static int sdma_v4_4_2_hw_init(void *handle)
static int sdma_v4_4_2_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t inst_mask;
int i;
if (amdgpu_sriov_vf(adev))
return 0;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
- AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
}
- sdma_v4_4_2_ctx_switch_enable(adev, false);
- sdma_v4_4_2_enable(adev, false);
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
return 0;
}
+static int sdma_v4_4_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+
static int sdma_v4_4_2_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_in_reset(adev))
+ sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+
return sdma_v4_4_2_hw_fini(adev);
}
@@ -1471,13 +1527,31 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t instance;
+ uint32_t instance, i;
DRM_DEBUG("IH: SDMA trap\n");
instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+
+ /* Client id gives the SDMA instance in AID. To know the exact SDMA
+ * instance, interrupt entry gives the node id which corresponds to the AID instance.
+ * Match node id with the AID id associated with the SDMA instance. */
+ for (i = instance; i < adev->sdma.num_instances;
+ i += adev->sdma.num_inst_per_aid) {
+ if (adev->sdma.instance[i].aid_id ==
+ node_id_to_phys_map[entry->node_id])
+ break;
+ }
+
+ if (i >= adev->sdma.num_instances) {
+ dev_WARN_ONCE(
+ adev->dev, 1,
+ "Couldn't find the right sdma instance in trap handler");
+ return 0;
+ }
+
switch (entry->ring_id) {
case 0:
- amdgpu_fence_process(&adev->sdma.instance[instance].ring);
+ amdgpu_fence_process(&adev->sdma.instance[i].ring);
break;
default:
break;
@@ -1496,7 +1570,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
* be disabled and the driver should only look for the aggregated
* interrupt via sync flood
*/
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
goto out;
instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
@@ -1535,15 +1609,22 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
unsigned type,
enum amdgpu_interrupt_state state)
{
- u32 sdma_edc_config;
+ u32 sdma_cntl;
- sdma_edc_config = RREG32_SDMA(type, regCC_SDMA_EDC_CONFIG);
- /*
- * FIXME: This was inherited from Aldebaran, but no this field
- * definition in the regspec of both Aldebaran and SDMA 4.4.2
- */
- sdma_edc_config |= (state == AMDGPU_IRQ_STATE_ENABLE) ? (1 << 2) : 0;
- WREG32_SDMA(type, regCC_SDMA_EDC_CONFIG, sdma_edc_config);
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
+ DRAM_ECC_INT_ENABLE, 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+ break;
+ /* sdma ecc interrupt is enabled by default
+ * driver doesn't need to do anything to
+ * enable the interrupt */
+ case AMDGPU_IRQ_STATE_ENABLE:
+ default:
+ break;
+ }
return 0;
}
@@ -1615,19 +1696,49 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
return 0;
}
-static void sdma_v4_4_2_update_medium_grain_clock_gating(
- struct amdgpu_device *adev,
- bool enable)
+static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
{
uint32_t data, def;
int i;
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ }
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
- data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
@@ -1637,11 +1748,9 @@ static void sdma_v4_4_2_update_medium_grain_clock_gating(
WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
}
} else {
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
- data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK |
- SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK |
- SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
@@ -1653,45 +1762,21 @@ static void sdma_v4_4_2_update_medium_grain_clock_gating(
}
}
-
-static void sdma_v4_4_2_update_medium_grain_light_sleep(
- struct amdgpu_device *adev,
- bool enable)
-{
- uint32_t data, def;
- int i;
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- /* 1-not override: enable sdma mem light sleep */
- def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL);
- data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32_SDMA(0, regSDMA_POWER_CNTL, data);
- }
- } else {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- /* 0-override:disable sdma mem light sleep */
- def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL);
- data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32_SDMA(0, regSDMA_POWER_CNTL, data);
- }
- }
-}
-
static int sdma_v4_4_2_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t inst_mask;
if (amdgpu_sriov_vf(adev))
return 0;
- sdma_v4_4_2_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE);
- sdma_v4_4_2_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE);
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+
+ sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
return 0;
}
@@ -1710,12 +1795,12 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
*flags = 0;
/* AMD_CG_SUPPORT_SDMA_MGCG */
- data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, regSDMA_CLK_CTRL));
- if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK))
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL));
+ if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK))
*flags |= AMD_CG_SUPPORT_SDMA_MGCG;
/* AMD_CG_SUPPORT_SDMA_LS */
- data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, regSDMA_POWER_CNTL));
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL));
if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
@@ -1740,7 +1825,7 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
.get_rptr = sdma_v4_4_2_ring_get_rptr,
@@ -1771,7 +1856,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
- .align_mask = 0xf,
+ .align_mask = 0xff,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
.get_rptr = sdma_v4_4_2_ring_get_rptr,
@@ -1802,7 +1887,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
{
- int i;
+ int i, dev_inst;
for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs;
@@ -1812,6 +1897,11 @@ static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
&sdma_v4_4_2_page_ring_funcs;
adev->sdma.instance[i].page.me = i;
}
+
+ dev_inst = GET_INST(SDMA0, i);
+ /* AID to which SDMA belongs depends on physical instance */
+ adev->sdma.instance[i].aid_id =
+ dev_inst / adev->sdma.num_inst_per_aid;
}
}
@@ -1965,3 +2055,146 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
.rev = 0,
.funcs = &sdma_v4_4_2_ip_funcs,
};
+
+static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask);
+
+ return r;
+}
+
+static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask = inst_mask;
+ int i;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, tmp_mask) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = {
+ .suspend = &sdma_v4_4_2_xcp_suspend,
+ .resume = &sdma_v4_4_2_xcp_resume
+};
+
+static const struct amdgpu_ras_err_status_reg_entry sdma_v4_2_2_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(SDMA0, 0, regSDMA_UE_ERR_STATUS_LO, regSDMA_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SDMA"},
+};
+
+static const struct amdgpu_ras_memory_id_entry sdma_v4_4_2_ras_memory_list[] = {
+ {AMDGPU_SDMA_MBANK_DATA_BUF0, "SDMA_MBANK_DATA_BUF0"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF1, "SDMA_MBANK_DATA_BUF1"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF2, "SDMA_MBANK_DATA_BUF2"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF3, "SDMA_MBANK_DATA_BUF3"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF4, "SDMA_MBANK_DATA_BUF4"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF5, "SDMA_MBANK_DATA_BUF5"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF6, "SDMA_MBANK_DATA_BUF6"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF7, "SDMA_MBANK_DATA_BUF7"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF8, "SDMA_MBANK_DATA_BUF8"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF9, "SDMA_MBANK_DATA_BUF9"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF10, "SDMA_MBANK_DATA_BUF10"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF11, "SDMA_MBANK_DATA_BUF11"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF12, "SDMA_MBANK_DATA_BUF12"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF13, "SDMA_MBANK_DATA_BUF13"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF14, "SDMA_MBANK_DATA_BUF14"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF15, "SDMA_MBANK_DATA_BUF15"},
+ {AMDGPU_SDMA_UCODE_BUF, "SDMA_UCODE_BUF"},
+ {AMDGPU_SDMA_RB_CMD_BUF, "SDMA_RB_CMD_BUF"},
+ {AMDGPU_SDMA_IB_CMD_BUF, "SDMA_IB_CMD_BUF"},
+ {AMDGPU_SDMA_UTCL1_RD_FIFO, "SDMA_UTCL1_RD_FIFO"},
+ {AMDGPU_SDMA_UTCL1_RDBST_FIFO, "SDMA_UTCL1_RDBST_FIFO"},
+ {AMDGPU_SDMA_UTCL1_WR_FIFO, "SDMA_UTCL1_WR_FIFO"},
+ {AMDGPU_SDMA_DATA_LUT_FIFO, "SDMA_DATA_LUT_FIFO"},
+ {AMDGPU_SDMA_SPLIT_DAT_BUF, "SDMA_SPLIT_DAT_BUF"},
+};
+
+static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
+ /* sdma v4_4_2 doesn't support query ce counts */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_v4_4_2_ras_memory_list,
+ ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
+ sdma_dev_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_query_ras_error_count(adev, i, ras_err_status);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst)
+{
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_dev_inst);
+}
+
+static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_reset_ras_error_count(adev, i);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_4_2_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
+};
+
+static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_2_ras_hw_ops,
+ },
+};
+
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.ras = &sdma_v4_4_2_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
index 4814e8a074d6..d516145529bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
@@ -27,4 +27,6 @@
extern const struct amd_ip_funcs sdma_v4_4_2_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block;
+extern struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs;
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 92e1299be021..5c4d4df9cf94 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -819,8 +819,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
-
if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
sdma_v5_0_ctx_switch_enable(adev, true);
sdma_v5_0_enable(adev, true);
@@ -1389,7 +1387,7 @@ static int sdma_v5_0_sw_init(void *handle)
(adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset
: (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
(i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index ca7e8757d78e..a7b230e5a26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -617,18 +617,14 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
-
if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
sdma_v5_2_ctx_switch_enable(adev, true);
sdma_v5_2_enable(adev, true);
}
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
return r;
- }
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -1253,7 +1249,7 @@ static int sdma_v5_2_sw_init(void *handle)
ring->doorbell_index =
(adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 3d9a80511a45..3b03dda854fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -238,6 +238,8 @@ static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*
* @ring: amdgpu ring pointer
* @ib: IB object to schedule
+ * @flags: unused
+ * @job: job to retrieve vmid from
*
* Schedule an IB in the DMA ring.
*/
@@ -585,16 +587,12 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
- ring->sched.ready = true;
-
if (amdgpu_sriov_vf(adev))
sdma_v6_0_enable(adev, true);
r = amdgpu_ring_test_helper(ring);
- if (r) {
- ring->sched.ready = false;
+ if (r)
return r;
- }
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -942,6 +940,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
* sdma_v6_0_ring_test_ib - test an IB on the DMA engine
*
* @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
*
* Test a simple IB in the DMA ring.
* Returns 0 on success, error on failure.
@@ -1122,6 +1121,7 @@ static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
/**
* sdma_v6_0_ring_pad_ib - pad the IB
* @ib: indirect buffer to fill with padding
+ * @ring: amdgpu ring pointer
*
* Pad the IB with NOPs to a boundary multiple of 8.
*/
@@ -1171,6 +1171,8 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
* sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
*
* @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
*
* Update the page table base and flush the VM TLB
* using sDMA.
@@ -1298,7 +1300,7 @@ static int sdma_v6_0_sw_init(void *handle)
ring->doorbell_index =
(adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
- ring->vm_hub = AMDGPU_GFXHUB_0;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 7f99e130acd0..f64b87b11b1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1181,12 +1181,12 @@ static uint32_t si_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index abca8b529721..42c4547f32ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -174,8 +174,6 @@ static int si_dma_start(struct amdgpu_device *adev)
WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
- ring->sched.ready = true;
-
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
new file mode 100644
index 000000000000..4368a5891eeb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_3.h"
+#include "soc15_common.h"
+#include "smuio/smuio_13_0_3_offset.h"
+#include "smuio/smuio_13_0_3_sh_mask.h"
+
+#define PKG_TYPE_MASK 0x00000003L
+
+/**
+ * smuio_v13_0_3_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v13_0_3_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v13_0_3_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_3_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v13_0_3_get_pkg_type - query package type set by MP1/bootcode
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns package type
+ */
+
+static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE);
+ /* pkg_type[4:0]
+ *
+ * bit 1 == 1 APU form factor
+ *
+ * b0100 - b1111 - Reserved
+ */
+ switch (data & PKG_TYPE_MASK) {
+ case 0x2:
+ pkg_type = AMDGPU_PKG_TYPE_APU;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_UNKNOWN;
+ break;
+ }
+
+ return pkg_type;
+}
+
+
+const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs = {
+ .get_die_id = smuio_v13_0_3_get_die_id,
+ .get_socket_id = smuio_v13_0_3_get_socket_id,
+ .get_pkg_type = smuio_v13_0_3_get_pkg_type,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
new file mode 100644
index 000000000000..795f66c5a58b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_3_H__
+#define __SMUIO_V13_0_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs;
+
+#endif /* __SMUIO_V13_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index a2fd1ffadb59..afcaeadda4c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -153,6 +153,24 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode =
.codec_array = rn_video_codecs_decode_array,
};
+static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_3_video_codecs_decode_array),
+ .codec_array = vcn_4_0_3_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@@ -185,6 +203,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &rn_video_codecs_decode;
return 0;
+ case IP_VERSION(4, 0, 3):
+ if (encode)
+ *codecs = &vcn_4_0_3_video_codecs_encode;
+ else
+ *codecs = &vcn_4_0_3_video_codecs_decode;
+ return 0;
default:
return -EINVAL;
}
@@ -312,7 +336,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
void soc15_grbm_select(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 queue, u32 vmid)
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id)
{
u32 grbm_gfx_cntl = 0;
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
@@ -320,12 +344,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
- WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
-}
-
-static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
+ WREG32_SOC15_RLC_SHADOW(GC, xcc_id, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static bool soc15_read_disabled_bios(struct amdgpu_device *adev)
@@ -364,12 +383,12 @@ static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_n
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -533,6 +552,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
if (connected_to_cpu)
return AMD_RESET_METHOD_MODE2;
break;
+ case IP_VERSION(13, 0, 6):
+ /* Use gpu_recovery param to target a reset method.
+ * Enable triggering of GPU reset only if specified
+ * by module parameter.
+ */
+ if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
+ return AMD_RESET_METHOD_MODE2;
+ else
+ return AMD_RESET_METHOD_NONE;
default:
break;
}
@@ -817,7 +845,6 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.reset_method = &soc15_asic_reset_method,
- .set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
@@ -839,7 +866,6 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.reset_method = &soc15_asic_reset_method,
- .set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
@@ -854,6 +880,28 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.query_video_codecs = &soc15_query_video_codecs,
};
+static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
+{
+ .read_disabled_bios = &soc15_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc15_read_register,
+ .reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
+ .get_xclk = &soc15_get_xclk,
+ .set_uvd_clocks = &soc15_set_uvd_clocks,
+ .set_vce_clocks = &soc15_set_vce_clocks,
+ .get_config_memsize = &soc15_get_config_memsize,
+ .need_full_reset = &soc15_need_full_reset,
+ .init_doorbell_index = &aqua_vanjaram_doorbell_index_init,
+ .get_pcie_usage = &vega20_get_pcie_usage,
+ .need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
+ .pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
+ .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+};
+
static int soc15_common_early_init(void *handle)
{
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
@@ -867,6 +915,8 @@ static int soc15_common_early_init(void *handle)
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
@@ -1095,9 +1145,18 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x3c;
break;
case IP_VERSION(9, 4, 3):
- adev->asic_funcs = &vega20_asic_funcs;
- adev->cg_flags = 0;
- adev->pg_flags = 0;
+ adev->asic_funcs = &aqua_vanjaram_asic_funcs;
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_IH_CG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x46;
break;
default:
/* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index efc2a253e8db..eac54042c6c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -100,7 +100,7 @@ struct soc15_ras_field_entry {
#define SOC15_RAS_REG_FIELD_VAL(val, entry, field) SOC15_REG_FIELD_VAL((val), (entry).field##_count_mask, (entry).field##_count_shift)
void soc15_grbm_select(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 queue, u32 vmid);
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id);
void soc15_set_virt_ops(struct amdgpu_device *adev);
void soc15_program_register_sequence(struct amdgpu_device *adev,
@@ -111,7 +111,11 @@ int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
int arct_reg_base_init(struct amdgpu_device *adev);
int aldebaran_reg_base_init(struct amdgpu_device *adev);
+void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev);
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
void vega10_doorbell_index_init(struct amdgpu_device *adev);
void vega20_doorbell_index_init(struct amdgpu_device *adev);
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 9fefd403e14f..96948a59f8dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -24,8 +24,18 @@
#ifndef __SOC15_COMMON_H__
#define __SOC15_COMMON_H__
+/* GET_INST returns the physical instance corresponding to a logical instance */
+#define GET_INST(ip, inst) \
+ (adev->ip_map.logical_to_dev_inst ? \
+ adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst) : inst)
+#define GET_MASK(ip, mask) \
+ (adev->ip_map.logical_to_dev_mask ? \
+ adev->ip_map.logical_to_dev_mask(adev, ip##_HWIP, mask) : mask)
+
/* Register Access Macros */
#define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+#define SOC15_REG_OFFSET1(ip, inst, reg, offset) \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset))
#define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \
((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
@@ -66,7 +76,8 @@
AMDGPU_REGS_NO_KIQ, ip##_HWIP)
#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
- __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, 0, ip##_HWIP)
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \
+ (offset), 0, ip##_HWIP)
#define WREG32_SOC15(ip, inst, reg, value) \
__WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \
@@ -86,31 +97,15 @@
__WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \
value, 0, ip##_HWIP)
-#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
-({ int ret = 0; \
- do { \
- uint32_t old_ = 0; \
- uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
- uint32_t loop = adev->usec_timeout; \
- ret = 0; \
- while ((tmp_ & (mask)) != (expected_value)) { \
- if (old_ != tmp_) { \
- loop = adev->usec_timeout; \
- old_ = tmp_; \
- } else \
- udelay(1); \
- tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
- loop--; \
- if (!loop) { \
- DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \
- inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \
- ret = -ETIMEDOUT; \
- break; \
- } \
- } \
- } while (0); \
- ret; \
-})
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)), \
+ #reg, expected_value, mask)
+
+#define SOC15_WAIT_ON_RREG_OFFSET(ip, inst, reg, offset, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg) + (offset)), \
+ #reg, expected_value, mask)
#define WREG32_RLC(reg, value) \
__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP)
@@ -157,10 +152,10 @@
do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
if (amdgpu_sriov_fullaccess(adev)) { \
- uint32_t r2 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \
- uint32_t r3 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \
- uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \
- uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \
+ uint32_t r2 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \
+ uint32_t r3 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \
+ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \
+ uint32_t grbm_idx = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \
if (target_reg == grbm_cntl) \
WREG32(r2, value); \
else if (target_reg == grbm_idx) \
@@ -176,13 +171,13 @@
#define WREG32_SOC15_RLC(ip, inst, reg, value) \
do { \
- uint32_t target_reg = adev->reg_offset[ip##_HWIP][0][reg##_BASE_IDX] + reg;\
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
__WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP); \
} while (0)
#define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
do { \
- uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
+ uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\
WREG32_RLC_EX(prefix, target_reg, value); \
} while (0)
@@ -199,4 +194,14 @@
#define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
__RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP)
+/* inst equals to ext for some IPs */
+#define RREG32_SOC15_EXT(ip, inst, reg, ext) \
+ RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext)) \
+
+#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \
+ WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext), \
+ value) \
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index d77162536514..e5e5d68a4d70 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -248,11 +248,6 @@ void soc21_grbm_select(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
}
-static void soc21_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
-}
-
static bool soc21_read_disabled_bios(struct amdgpu_device *adev)
{
/* todo */
@@ -288,12 +283,12 @@ static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_n
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
}
@@ -542,9 +537,9 @@ static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
if (enter)
- amdgpu_gfx_rlc_enter_safe_mode(adev);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
else
- amdgpu_gfx_rlc_exit_safe_mode(adev);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
if (adev->gfx.funcs->update_perfmon_mgcg)
adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
@@ -559,7 +554,6 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs =
.read_register = &soc21_read_register,
.reset = &soc21_asic_reset,
.reset_method = &soc21_asic_reset_method,
- .set_vga_state = &soc21_vga_set_state,
.get_xclk = &soc21_get_xclk,
.set_uvd_clocks = &soc21_set_uvd_clocks,
.set_vce_clocks = &soc21_set_vce_clocks,
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 30d0482ac466..879bb7af297c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -129,6 +129,8 @@ struct ta_ras_trigger_error_input {
struct ta_ras_init_flags {
uint8_t poison_mode_en;
uint8_t dgpu_mode;
+ uint16_t xcc_mask;
+ uint8_t channel_dis_num;
};
struct ta_ras_output_flags {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index d51ae0bc36f7..46bfdee79bfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -444,6 +444,11 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade
umc_v8_10_ecc_info_query_error_address, ras_error_status);
}
+static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr)
+{
+ hdr->version = RAS_TABLE_VER_V2_1;
+}
+
const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
.query_ras_error_count = umc_v8_10_query_ras_error_count,
.query_ras_error_address = umc_v8_10_query_ras_error_address,
@@ -457,4 +462,5 @@ struct amdgpu_umc_ras umc_v8_10_ras = {
.query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
+ .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
index c6dfd433fec7..dc12e0af5451 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -33,7 +33,8 @@
/* Total channel instances for all available umc nodes */
#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
- (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->gmc.num_umc)
+ (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * \
+ (adev)->gmc.num_umc - hweight32((adev)->gmc.m_half_use) * 2)
/* UMC regiser per channel offset */
#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index e32b656b3dab..abaa4463e906 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -444,7 +444,7 @@ static int uvd_v7_0_sw_init(void *handle)
continue;
if (!amdgpu_sriov_vf(adev)) {
ring = &adev->uvd.inst[j].ring;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "uvd_%d", ring->me);
r = amdgpu_ring_init(adev, ring, 512,
&adev->uvd.inst[j].irq, 0,
@@ -455,7 +455,7 @@ static int uvd_v7_0_sw_init(void *handle)
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst[j].ring_enc[i];
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i);
if (amdgpu_sriov_vf(adev)) {
ring->use_doorbell = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 57b85bb6a1e4..e0b70cd3b697 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -466,7 +466,7 @@ static int vce_v4_0_sw_init(void *handle)
enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
ring = &adev->vce.ring[i];
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vce%d", i);
if (amdgpu_sriov_vf(adev)) {
/* DOORBELL only works under SRIOV */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 761c28fa6ec1..16feb491adf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -120,7 +120,7 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
ring = &adev->vcn.inst->ring_dec;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -142,7 +142,7 @@ static int vcn_v1_0_sw_init(void *handle)
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_enc%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
hw_prio, NULL);
@@ -211,7 +211,7 @@ static int vcn_v1_0_hw_init(void *handle)
goto done;
}
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
@@ -1304,7 +1304,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
/* Restore */
- ring = &adev->jpeg.inst->ring_dec;
+ ring = adev->jpeg.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
@@ -1802,7 +1802,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
@@ -1810,7 +1810,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
adev->vcn.pause_dpg_mode(adev, 0, &new_state);
}
- fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
+ fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec);
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
if (fences == 0) {
@@ -1832,7 +1832,7 @@ static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
- if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec))
DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
@@ -1864,7 +1864,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 7c2b3aa48083..c975aed2f6c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -129,7 +129,7 @@ static int vcn_v2_0_sw_init(void *handle)
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
@@ -160,7 +160,7 @@ static int vcn_v2_0_sw_init(void *handle)
ring = &adev->vcn.inst->ring_enc[i];
ring->use_doorbell = true;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
if (!amdgpu_sriov_vf(adev))
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 515681c88dcb..bb1875f926f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -188,9 +188,9 @@ static int vcn_v2_5_sw_init(void *handle)
(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
- ring->vm_hub = AMDGPU_MMHUB_1;
+ ring->vm_hub = AMDGPU_MMHUB1(0);
else
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec_%d", j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
@@ -208,9 +208,9 @@ static int vcn_v2_5_sw_init(void *handle)
(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
- ring->vm_hub = AMDGPU_MMHUB_1;
+ ring->vm_hub = AMDGPU_MMHUB1(0);
else
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_enc_%d.%d", j, i);
r = amdgpu_ring_init(adev, ring, 512,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 3eab186261aa..c8f63b3c6f69 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -189,7 +189,7 @@ static int vcn_v3_0_sw_init(void *handle)
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
}
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
AMDGPU_RING_PRIO_DEFAULT,
@@ -213,7 +213,7 @@ static int vcn_v3_0_sw_init(void *handle)
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
}
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_enc_%d.%d", i, j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
hw_prio, &adev->vcn.inst[i].sched_score);
@@ -1313,7 +1313,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
- for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
+ for (i = 0; i < MMSCH_V3_0_VCN_INSTANCES; i++) {
header.inst[i].init_status = 0;
header.inst[i].table_offset = 0;
header.inst[i].table_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index da126ff8bcbc..b48bb5212488 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -153,7 +153,7 @@ static int vcn_v4_0_sw_init(void *handle)
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
else
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
- ring->vm_hub = AMDGPU_MMHUB_0;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_unified_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
@@ -1243,7 +1243,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
- for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
+ for (i = 0; i < MMSCH_V4_0_VCN_INSTANCES; i++) {
header.inst[i].init_status = 0;
header.inst[i].table_offset = 0;
header.inst[i].table_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
new file mode 100644
index 000000000000..5d67b8b8a3d6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -0,0 +1,1541 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+
+static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect);
+/**
+ * vcn_v4_0_3_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int vcn_v4_0_3_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* re-use enc ring as unified ring */
+ adev->vcn.num_enc_rings = 1;
+
+ vcn_v4_0_3_set_unified_ring_funcs(adev);
+ vcn_v4_0_3_set_irq_funcs(adev);
+ vcn_v4_0_3_set_ras_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v4_0_3_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_3_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT,
+ &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = true;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_3_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r, idx;
+
+ if (drm_dev_enter(&adev->ddev, &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = cpu_to_le32(false);
+ }
+ drm_dev_exit(idx);
+ }
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_3_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell) {
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst,
+ adev->vcn.inst[i].aid_id);
+
+ WREG32_SOC15(
+ VCN, GET_INST(VCN, ring->me),
+ regVCN_RB1_DB_CTRL,
+ ring->doorbell_index
+ << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(
+ VCN, GET_INST(VCN, ring->me),
+ regVCN_RB1_DB_CTRL);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+
+done:
+ if (!r)
+ DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_3_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
+ vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_3_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = vcn_v4_0_3_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_3_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v4_0_3_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
+ .tmr_mc_addr_lo));
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
+ .tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1,
+ AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2,
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t data;
+ int vcn_inst;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @sram_sel: sram select
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t data;
+ int vcn_inst;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ int vcn_inst;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
+ inst_idx, adev->vcn.inst[inst_idx].aid_id);
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable clock gating */
+ vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ psp_update_vcn_sram(adev, 0, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
+
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /*resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_start - VCN start
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_3_start(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ int i, j, k, r, vcn_inst;
+ uint32_t tmp;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ vcn_inst = GET_INST(VCN, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
+ UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v4_0_3_disable_clock_gating(adev, i);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
+ tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_3_mc_resume(adev, i);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst,
+ regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_DEV_ERROR(adev->dev,
+ "VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
+
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ fw_shared->sq.queue_mode &=
+ cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
+
+ }
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop VCN block with dpg mode
+ */
+static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t tmp;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_stop - VCN stop
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ int i, r = 0, vcn_inst;
+ uint32_t tmp;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_3_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
+ UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto Done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* Unblock VCPU Register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* reset LMI UMC/LMI/VCPU */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear VCN status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_3_enable_clock_gating(adev, i);
+ }
+Done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
+ struct dpg_pause_state *new_state)
+{
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me),
+ regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id =
+ vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+ DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n");
+}
+
+/**
+ * vcn_v4_0_3_is_idle - check VCN block is idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_3_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) ==
+ UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_3_wait_for_idle - wait for VCN block idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_3_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS,
+ UVD_STATUS__IDLE, UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
+ *
+ * @handle: amdgpu_device pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_3_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i),
+ regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_3_enable_clock_gating(adev, i);
+ } else {
+ vcn_v4_0_3_disable_clock_gating(adev, i);
+ }
+ }
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
+ *
+ * @handle: amdgpu_device pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+static int vcn_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_3_stop(adev);
+ else
+ ret = vcn_v4_0_3_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block interrupt state
+ */
+static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
+ .set = vcn_v4_0_3_set_interrupt_state,
+ .process = vcn_v4_0_3_process_interrupt,
+};
+
+/**
+ * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst->irq.num_types++;
+ }
+ adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
+}
+
+static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
+ .name = "vcn_v4_0_3",
+ .early_init = vcn_v4_0_3_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v4_0_3_sw_init,
+ .sw_fini = vcn_v4_0_3_sw_fini,
+ .hw_init = vcn_v4_0_3_hw_init,
+ .hw_fini = vcn_v4_0_3_hw_fini,
+ .suspend = vcn_v4_0_3_suspend,
+ .resume = vcn_v4_0_3_resume,
+ .is_idle = vcn_v4_0_3_is_idle,
+ .wait_for_idle = vcn_v4_0_3_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
+ .set_powergating_state = vcn_v4_0_3_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &vcn_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
+ {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
+};
+
+static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* vcn v4_0_3 only support query uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ vcn_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, vcn_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ dev_warn(adev->dev, "VCN RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ vcn_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+ GET_INST(VCN, vcn_inst));
+}
+
+static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ dev_warn(adev->dev, "VCN RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
+};
+
+static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v4_0_3_ras_hw_ops,
+ },
+};
+
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v4_0_3_ras;
+}
+
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect)
+{
+ uint32_t tmp;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
new file mode 100644
index 000000000000..0b046114373a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_3_H__
+#define __VCN_V4_0_3_H__
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block;
+
+#endif /* __VCN_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 536128447b71..4d719df376a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -334,7 +334,8 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index));
/* Enable IH Retry CAM */
- if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0))
+ if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0) ||
+ adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))
WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
ENABLE, 1);
else
@@ -526,6 +527,7 @@ static int vega20_ih_early_init(void *handle)
static int vega20_ih_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool use_bus_addr = true;
int r;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -533,14 +535,18 @@ static int vega20_ih_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
+ if ((adev->flags & AMD_IS_APU) &&
+ (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
+ use_bus_addr = false;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
if (r)
return r;
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, use_bus_addr);
if (r)
return r;
@@ -559,7 +565,7 @@ static int vega20_ih_sw_init(void *handle)
/* initialize ih control registers offset */
vega20_ih_init_register_offset(adev);
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, use_bus_addr);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index c0360dbebd4b..6a8494f98d3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -587,11 +587,6 @@ void vi_srbm_select(struct amdgpu_device *adev,
WREG32(mmSRBM_GFX_CNTL, srbm_gfx_cntl);
}
-static void vi_vga_set_state(struct amdgpu_device *adev, bool state)
-{
- /* todo */
-}
-
static bool vi_read_disabled_bios(struct amdgpu_device *adev)
{
u32 bus_cntl;
@@ -769,12 +764,12 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff)
- amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
return val;
} else {
@@ -1442,7 +1437,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.read_register = &vi_read_register,
.reset = &vi_asic_reset,
.reset_method = &vi_asic_reset_method,
- .set_vga_state = &vi_vga_set_state,
.get_xclk = &vi_get_xclk,
.set_uvd_clocks = &vi_set_uvd_clocks,
.set_vce_clocks = &vi_set_vce_clocks,